line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WWW::Mixi::Scraper::Plugin::ViewDiary;
|
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
727
|
use strict;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
26
|
|
4
|
1
|
|
|
1
|
|
5
|
use warnings;
|
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
21
|
|
5
|
1
|
|
|
1
|
|
5
|
use WWW::Mixi::Scraper::Plugin;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
6
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
validator {qw(
|
8
|
|
|
|
|
|
|
id is_number
|
9
|
|
|
|
|
|
|
owner_id is_number
|
10
|
|
|
|
|
|
|
)};
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
sub scrape {
|
13
|
0
|
|
|
0
|
1
|
|
my ($self, $html) = @_;
|
14
|
|
|
|
|
|
|
|
15
|
0
|
|
|
|
|
|
my %scraper;
|
16
|
|
|
|
|
|
|
$scraper{images} = scraper {
|
17
|
0
|
|
|
0
|
|
|
process 'a',
|
18
|
|
|
|
|
|
|
link => '@onClick';
|
19
|
0
|
|
|
|
|
|
process 'a>img',
|
20
|
|
|
|
|
|
|
thumb_link => '@src';
|
21
|
0
|
|
|
|
|
|
result qw( link thumb_link );
|
22
|
0
|
|
|
|
|
|
};
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
$scraper{diary} = scraper {
|
25
|
0
|
|
|
0
|
|
|
process 'div.viewDiaryBox>div.listDiaryTitle>dl>dd',
|
26
|
|
|
|
|
|
|
time => 'TEXT';
|
27
|
0
|
|
|
|
|
|
process 'div.viewDiaryBox>div.listDiaryTitle>dl>dt',
|
28
|
|
|
|
|
|
|
subject => 'TEXT';
|
29
|
0
|
|
|
|
|
|
process 'div.viewDiaryBox>div.listDiaryTitle>dl>dt>span',
|
30
|
|
|
|
|
|
|
string => 'TEXT';
|
31
|
0
|
|
|
|
|
|
process 'div#diary_body',
|
32
|
|
|
|
|
|
|
description => $self->html_or_text;
|
33
|
0
|
|
|
|
|
|
process 'div.diaryPhoto>table>tr>td',
|
34
|
|
|
|
|
|
|
'images[]' => $scraper{images};
|
35
|
0
|
|
|
|
|
|
process 'div.diaryPaging01>div.diaryPagingLeft>a',
|
36
|
|
|
|
|
|
|
prev_link => '@href';
|
37
|
0
|
|
|
|
|
|
result qw( time subject description images prev_link string );
|
38
|
0
|
|
|
|
|
|
};
|
39
|
|
|
|
|
|
|
|
40
|
0
|
|
|
|
|
|
my $stash = $scraper{diary}->scrape(\$html);
|
41
|
0
|
|
|
|
|
|
$stash->{link} = delete $stash->{prev_link};
|
42
|
0
|
|
|
|
|
|
$stash->{link} =~ s/neighbor_diary/view_diary/;
|
43
|
0
|
|
|
|
|
|
$stash->{link} =~ s/&direction=prev.*//;
|
44
|
0
|
|
|
|
|
|
$stash = $self->post_process($stash)->[0];
|
45
|
|
|
|
|
|
|
|
46
|
0
|
|
0
|
|
|
|
my $string = delete $stash->{string} || '';
|
47
|
0
|
|
|
|
|
|
$stash->{subject} =~ s/$string$//;
|
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
$scraper{comments} = scraper {
|
50
|
0
|
|
|
0
|
|
|
process 'dl.comment>dt>span.date',
|
51
|
|
|
|
|
|
|
time => 'TEXT';
|
52
|
0
|
|
|
|
|
|
process 'dl.comment>dt>a',
|
53
|
|
|
|
|
|
|
link => '@href',
|
54
|
|
|
|
|
|
|
name => 'TEXT';
|
55
|
0
|
|
|
|
|
|
process 'dl.comment>dd',
|
56
|
|
|
|
|
|
|
description => $self->html_or_text;
|
57
|
0
|
|
|
|
|
|
result qw( time link name description );
|
58
|
0
|
|
|
|
|
|
};
|
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
$scraper{list} = scraper {
|
61
|
0
|
|
|
0
|
|
|
process 'div.commentListArea>ul>li',
|
62
|
|
|
|
|
|
|
'comments[]' => $scraper{comments};
|
63
|
0
|
|
|
|
|
|
result qw( comments );
|
64
|
0
|
|
|
|
|
|
};
|
65
|
|
|
|
|
|
|
|
66
|
0
|
|
|
|
|
|
$stash->{comments} = $self->post_process($scraper{list}->scrape(\$html));
|
67
|
|
|
|
|
|
|
|
68
|
0
|
|
|
|
|
|
return $stash;
|
69
|
|
|
|
|
|
|
}
|
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
1;
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
__END__
|