line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WWW::Mixi::Scraper::Plugin::ViewEvent;
|
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
1000
|
use strict;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
33
|
|
4
|
1
|
|
|
1
|
|
6
|
use warnings;
|
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
23
|
|
5
|
1
|
|
|
1
|
|
3
|
use WWW::Mixi::Scraper::Plugin;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
4
|
|
6
|
1
|
|
|
1
|
|
4
|
use WWW::Mixi::Scraper::Utils qw( _uri _datetime );
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
63
|
|
7
|
1
|
|
|
1
|
|
1832
|
use utf8;
|
|
1
|
|
|
|
|
17
|
|
|
1
|
|
|
|
|
8
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
validator {qw(
|
10
|
|
|
|
|
|
|
id is_number
|
11
|
|
|
|
|
|
|
comm_id is_number
|
12
|
|
|
|
|
|
|
page is_number_or_all
|
13
|
|
|
|
|
|
|
)};
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub scrape {
|
16
|
0
|
|
|
0
|
1
|
|
my ($self, $html) = @_;
|
17
|
|
|
|
|
|
|
|
18
|
0
|
|
|
|
|
|
my %scraper;
|
19
|
|
|
|
|
|
|
$scraper{images} = scraper {
|
20
|
0
|
|
|
0
|
|
|
process 'a',
|
21
|
|
|
|
|
|
|
link => '@onClick';
|
22
|
0
|
|
|
|
|
|
process 'a>img',
|
23
|
|
|
|
|
|
|
thumb_link => '@src';
|
24
|
0
|
|
|
|
|
|
result qw( link thumb_link );
|
25
|
0
|
|
|
|
|
|
};
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
$scraper{infos} = scraper {
|
28
|
0
|
|
|
0
|
|
|
process 'dt',
|
29
|
|
|
|
|
|
|
name => 'TEXT';
|
30
|
0
|
|
|
|
|
|
process 'dd',
|
31
|
|
|
|
|
|
|
string => 'TEXT';
|
32
|
0
|
|
|
|
|
|
process 'dd>a',
|
33
|
|
|
|
|
|
|
link => '@href',
|
34
|
|
|
|
|
|
|
subject => 'TEXT';
|
35
|
0
|
|
|
|
|
|
result qw( name string link subject );
|
36
|
0
|
|
|
|
|
|
};
|
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
$scraper{topic} = scraper {
|
39
|
0
|
|
|
0
|
|
|
process 'dl.bbsList01>dt>span.date',
|
40
|
|
|
|
|
|
|
'time' => 'TEXT';
|
41
|
0
|
|
|
|
|
|
process 'dl.bbsList01>dt[class="bbsTitle clearfix"]>span.titleSpan',
|
42
|
|
|
|
|
|
|
'subject' => 'TEXT';
|
43
|
0
|
|
|
|
|
|
process 'dd.bbsContent>dl>dt>a',
|
44
|
|
|
|
|
|
|
'name' => 'TEXT',
|
45
|
|
|
|
|
|
|
'name_link' => '@href';
|
46
|
0
|
|
|
|
|
|
process 'dd.bbsContent>dl>dt',
|
47
|
|
|
|
|
|
|
'name_string' => 'TEXT',
|
48
|
|
|
|
|
|
|
process 'dd.bbsContent>dl>dd',
|
49
|
|
|
|
|
|
|
'description' => $self->html_or_text;
|
50
|
0
|
|
|
|
|
|
process 'div.communityPhoto>table>tr>td',
|
51
|
|
|
|
|
|
|
'images[]' => $scraper{images};
|
52
|
0
|
|
|
|
|
|
process 'dl.bbsList01>dd.bbsInfo>dl',
|
53
|
|
|
|
|
|
|
'infos[]' => $scraper{infos};
|
54
|
0
|
|
|
|
|
|
result qw( time subject name_string name name_link images infos description );
|
55
|
0
|
|
|
|
|
|
};
|
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
$scraper{comment_body} = scraper {
|
58
|
0
|
|
|
0
|
|
|
process 'dl.commentContent01>dt>a',
|
59
|
|
|
|
|
|
|
'name_link' => '@href',
|
60
|
|
|
|
|
|
|
'name' => 'TEXT';
|
61
|
0
|
|
|
|
|
|
process 'dl.commentContent01>dt',
|
62
|
|
|
|
|
|
|
'name_string' => 'TEXT';
|
63
|
0
|
|
|
|
|
|
process 'dl.commentContent01>dd',
|
64
|
|
|
|
|
|
|
'description' => $self->html_or_text;
|
65
|
0
|
|
|
|
|
|
process 'dl.commentContent01>dd>table>tr>td',
|
66
|
|
|
|
|
|
|
'images[]' => $scraper{images};
|
67
|
0
|
|
|
|
|
|
result qw( name_link name description images );
|
68
|
0
|
|
|
|
|
|
};
|
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
$scraper{comment} = scraper {
|
71
|
0
|
|
|
0
|
|
|
process 'dl.commentList01>dt>span.date',
|
72
|
|
|
|
|
|
|
'dates[]' => 'TEXT';
|
73
|
0
|
|
|
|
|
|
process 'dl.commentList01>dt>span.senderId',
|
74
|
|
|
|
|
|
|
'sender_ids[]' => 'TEXT';
|
75
|
0
|
|
|
|
|
|
process 'dl.commentList01>dd',
|
76
|
|
|
|
|
|
|
'comments[]' => $scraper{comment_body};
|
77
|
0
|
|
|
|
|
|
result qw( dates comments sender_ids );
|
78
|
0
|
|
|
|
|
|
};
|
79
|
|
|
|
|
|
|
|
80
|
0
|
|
|
|
|
|
my $stash = $self->post_process($scraper{topic}->scrape(\$html))->[0];
|
81
|
|
|
|
|
|
|
|
82
|
0
|
0
|
0
|
|
|
|
if ($stash->{name_string} && !$stash->{name}) {
|
83
|
0
|
|
|
|
|
|
$stash->{name} = $stash->{name_string};
|
84
|
|
|
|
|
|
|
}
|
85
|
|
|
|
|
|
|
|
86
|
0
|
0
|
|
|
|
|
foreach my $item (@{ $stash->{infos} || [] }) {
|
|
0
|
|
|
|
|
|
|
87
|
0
|
0
|
|
|
|
|
if ( $item->{name} eq '開催日時' ) {
|
88
|
0
|
|
|
|
|
|
$stash->{date} = $item->{string};
|
89
|
|
|
|
|
|
|
}
|
90
|
0
|
0
|
|
|
|
|
if ( $item->{name} eq '募集期限' ) {
|
91
|
0
|
|
|
|
|
|
$stash->{deadline} = $item->{string};
|
92
|
|
|
|
|
|
|
}
|
93
|
0
|
0
|
|
|
|
|
if ( $item->{name} eq '開催場所' ) {
|
94
|
0
|
|
|
|
|
|
$stash->{location} = $item->{string};
|
95
|
|
|
|
|
|
|
}
|
96
|
0
|
0
|
|
|
|
|
if ( $item->{name} eq '参加者' ) {
|
97
|
0
|
|
|
|
|
|
$stash->{list}->{count} = $item->{string};
|
98
|
0
|
|
|
|
|
|
$stash->{list}->{link} = _uri( $item->{link} );
|
99
|
0
|
|
|
|
|
|
$stash->{list}->{subject} = $item->{subject};
|
100
|
|
|
|
|
|
|
}
|
101
|
|
|
|
|
|
|
}
|
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# XXX: this fails when you test with local files.
|
104
|
|
|
|
|
|
|
# However, this link cannot be extracted from the html,
|
105
|
|
|
|
|
|
|
# at least as of writing this. ugh.
|
106
|
0
|
|
|
|
|
|
$stash->{link} = $self->{uri};
|
107
|
|
|
|
|
|
|
|
108
|
0
|
|
|
|
|
|
my $stash_c = $self->post_process($scraper{comment}->scrape(\$html))->[0];
|
109
|
|
|
|
|
|
|
|
110
|
0
|
0
|
|
|
|
|
my @dates = @{ $stash_c->{dates} || [] };
|
|
0
|
|
|
|
|
|
|
111
|
0
|
0
|
|
|
|
|
my @sender_ids = @{ $stash_c->{sender_ids} || [] };
|
|
0
|
|
|
|
|
|
|
112
|
0
|
0
|
|
|
|
|
my @comments = @{ $stash_c->{comments} || [] };
|
|
0
|
|
|
|
|
|
|
113
|
0
|
|
|
|
|
|
foreach my $comment ( @comments ) {
|
114
|
0
|
|
|
|
|
|
$comment->{time} = _datetime( shift @dates );
|
115
|
0
|
|
|
|
|
|
$comment->{subject} = shift @sender_ids;
|
116
|
|
|
|
|
|
|
|
117
|
0
|
0
|
|
|
|
|
if (!$comment->{name}) {
|
118
|
0
|
|
0
|
|
|
|
$comment->{name} = $comment->{name_string} || ' ';
|
119
|
|
|
|
|
|
|
}
|
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# incompatible with WWW::Mixi to let comment links
|
122
|
|
|
|
|
|
|
# look more 'permanent' to make plagger/rss readers happier
|
123
|
0
|
|
|
|
|
|
$comment->{name_link} = _uri( $comment->{name_link} );
|
124
|
0
|
0
|
|
|
|
|
$comment->{link} = $stash->{link}
|
125
|
|
|
|
|
|
|
? _uri( $stash->{link} . '#' . $comment->{subject} )
|
126
|
|
|
|
|
|
|
: undef;
|
127
|
|
|
|
|
|
|
|
128
|
0
|
0
|
|
|
|
|
if ( $comment->{images} ) {
|
129
|
0
|
0
|
|
|
|
|
foreach my $image ( @{ $comment->{images} || [] } ) {
|
|
0
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
|
$image->{link} = _uri( $image->{link} );
|
131
|
0
|
|
|
|
|
|
$image->{thumb_link} = _uri( $image->{thumb_link} );
|
132
|
|
|
|
|
|
|
}
|
133
|
|
|
|
|
|
|
}
|
134
|
|
|
|
|
|
|
}
|
135
|
|
|
|
|
|
|
|
136
|
0
|
|
|
|
|
|
$stash->{comments} = \@comments;
|
137
|
|
|
|
|
|
|
|
138
|
0
|
|
|
|
|
|
return $stash;
|
139
|
|
|
|
|
|
|
}
|
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
1;
|
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
__END__
|