line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# ABSTRACT: https://www.jjwxc.net |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=pod |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=encoding utf8 |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 FUNCTION |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head2 make_query_request |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
#$type:作品,作者,主角,配角,其他 |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
$parser->make_query_request( $type, $keyword ); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=cut |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
package Novel::Robot::Parser::jjwxc; |
18
|
2
|
|
|
2
|
|
12
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
54
|
|
19
|
2
|
|
|
2
|
|
9
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
51
|
|
20
|
2
|
|
|
2
|
|
7
|
use utf8; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
10
|
|
21
|
2
|
|
|
2
|
|
42
|
use base 'Novel::Robot::Parser'; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
224
|
|
22
|
|
|
|
|
|
|
|
23
|
2
|
|
|
2
|
|
12
|
use Web::Scraper; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
14
|
|
24
|
2
|
|
|
2
|
|
164
|
use Encode; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
214
|
|
25
|
2
|
|
|
2
|
|
13
|
use Data::Dumper; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
3117
|
|
26
|
|
|
|
|
|
|
|
27
|
0
|
|
|
0
|
0
|
0
|
sub base_url { 'https://www.jjwxc.net' } |
28
|
|
|
|
|
|
|
|
29
|
0
|
|
|
0
|
0
|
0
|
sub domain { 'jjwxc.net' } |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub generate_novel_url { |
32
|
2
|
|
|
2
|
0
|
5
|
my ( $self, $index_url ) = @_; |
33
|
2
|
|
|
|
|
14
|
my ( $novelid ) = $index_url =~ m#novelid=(\d+)#; |
34
|
2
|
50
|
|
|
|
9
|
my $u = $novelid ? "https://m.jjwxc.net/book2/$novelid?more=0&whole=1" : $index_url; |
35
|
2
|
|
|
|
|
8
|
return $u; |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub parse_novel { |
39
|
2
|
|
|
2
|
0
|
7
|
my ( $self, $h ) = @_; |
40
|
2
|
|
|
|
|
180
|
$$h =~ s#本书霸王票读者排行.*##s; |
41
|
|
|
|
|
|
|
|
42
|
2
|
|
|
|
|
6
|
my %r; |
43
|
2
|
|
|
|
|
22
|
( $r{book} ) = $$h =~ m#\s*《(.+?)》#s; |
44
|
|
|
|
|
|
|
|
45
|
2
|
|
|
|
|
22
|
($r{writer}) = $$h=~m#作者:(.+?)#s; |
46
|
|
|
|
|
|
|
|
47
|
2
|
|
|
|
|
55
|
my ( $cc ) = $$h =~ m#章节列表: .+?(#s; |
48
|
2
|
|
|
|
|
57
|
my @f = $cc =~ m#(.+?)#sg; |
49
|
2
|
|
|
|
|
15
|
my $max_chapter_num = ( $#f + 1 ) / 2; |
50
|
2
|
|
|
|
|
8
|
for my $i ( 1 .. $max_chapter_num ) { |
51
|
12
|
|
|
|
|
21
|
my $j = 2 * $i - 1; |
52
|
12
|
|
|
|
|
30
|
my $t = $f[$j]; |
53
|
12
|
|
|
|
|
38
|
$t =~ s/^\d+\.( )*//; |
54
|
12
|
|
|
|
|
51
|
$t =~ s/ / /g; |
55
|
12
|
|
|
|
|
54
|
$t =~ s/^.+>//; |
56
|
12
|
|
|
|
|
64
|
$t =~ s/\s+/ /g; |
57
|
|
|
|
|
|
|
|
58
|
12
|
|
|
|
|
20
|
my $ui = 2 * $i - 2; |
59
|
12
|
|
|
|
|
23
|
my $u = "https://m.jjwxc.net$f[$ui]"; |
60
|
12
|
|
|
|
|
16
|
push @{ $r{item_list} }, { id => $i, title => $t, url => $u }; |
|
12
|
|
|
|
|
41
|
|
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
2
|
|
|
|
|
38
|
return \%r; |
64
|
|
|
|
|
|
|
} ## end sub parse_novel |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub parse_novel_item { |
67
|
2
|
|
|
2
|
0
|
8
|
my ( $self, $h ) = @_; |
68
|
|
|
|
|
|
|
|
69
|
2
|
|
|
|
|
233
|
my ( $c ) = $$h =~ m#]+>.+?]*>(.+?)#s; |
70
|
|
|
|
|
|
|
|
71
|
2
|
|
50
|
|
|
20
|
return { content => $c || '' }; |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub parse_board { |
75
|
0
|
|
|
0
|
0
|
|
my ( $self, $h ) = @_; |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
my $parse_writer = scraper { |
78
|
0
|
|
|
0
|
|
|
process_first '//tr[@valign="bottom"]//b', writer => 'TEXT'; |
79
|
0
|
|
|
|
|
|
}; |
80
|
0
|
|
|
|
|
|
my $ref = $parse_writer->scrape( $h ); |
81
|
|
|
|
|
|
|
|
82
|
0
|
|
|
|
|
|
$self->tidy_string( $ref, 'writer' ); |
83
|
0
|
|
|
|
|
|
return { writer => $ref->{writer} }; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub parse_board_item { |
87
|
0
|
|
|
0
|
0
|
|
my ( $self, $h ) = @_; |
88
|
0
|
|
|
|
|
|
my @book_list; |
89
|
0
|
|
|
|
|
|
my $series = '未分类'; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
my $parse_writer = scraper { |
92
|
|
|
|
|
|
|
process '//tr[@bgcolor="#eefaee"]', 'book_list[]' => sub { |
93
|
0
|
|
|
|
|
|
my $tr = $_[0]; |
94
|
0
|
|
|
|
|
|
$series = $self->parse_writer_series_name( $tr, $series ); |
95
|
|
|
|
|
|
|
|
96
|
0
|
|
|
|
|
|
my $book = $self->parse_writer_book_info( $tr, $series ); |
97
|
0
|
0
|
0
|
|
|
|
push @book_list, $book if ( $book and $book->{url} =~ /onebook/ ); |
98
|
0
|
|
|
0
|
|
|
}; |
99
|
0
|
|
|
|
|
|
}; |
100
|
|
|
|
|
|
|
|
101
|
0
|
|
|
|
|
|
my $ref = $parse_writer->scrape( $h ); |
102
|
|
|
|
|
|
|
|
103
|
0
|
|
|
|
|
|
$self->tidy_string( $ref, 'writer' ); |
104
|
0
|
|
|
|
|
|
$_->{writer} = $ref->{writer} for @book_list; |
105
|
|
|
|
|
|
|
|
106
|
0
|
|
|
|
|
|
return \@book_list; |
107
|
|
|
|
|
|
|
} ## end sub parse_board_item |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub parse_writer_series_name { |
110
|
0
|
|
|
0
|
0
|
|
my ( $self, $tr, $series ) = @_; |
111
|
|
|
|
|
|
|
|
112
|
0
|
0
|
|
|
|
|
return $series unless ( $tr->look_down( 'colspan', '7' ) ); |
113
|
|
|
|
|
|
|
|
114
|
0
|
0
|
|
|
|
|
if ( $tr->as_trimmed_text =~ /【(.*)】/ ) { |
115
|
0
|
|
|
|
|
|
$series = $1; |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
0
|
|
|
|
|
|
return $series; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
sub parse_writer_book_info { |
122
|
0
|
|
|
0
|
0
|
|
my ( $self, $tr, $series ) = @_; |
123
|
|
|
|
|
|
|
|
124
|
0
|
|
|
|
|
|
my $book = $tr->look_down( '_tag', 'a' ); |
125
|
0
|
0
|
|
|
|
|
return unless ( $book ); |
126
|
|
|
|
|
|
|
|
127
|
0
|
|
|
|
|
|
my $book_url = $book->attr( 'href' ); |
128
|
|
|
|
|
|
|
|
129
|
0
|
|
|
|
|
|
my $bookname = $book->as_trimmed_text; |
130
|
0
|
|
|
|
|
|
substr( $bookname, 0, 1 ) = ''; |
131
|
0
|
0
|
|
|
|
|
$bookname .= '[锁]' if ( $tr->look_down( 'color', 'gray' ) ); |
132
|
|
|
|
|
|
|
|
133
|
0
|
|
|
|
|
|
my $progress = ( $tr->look_down( '_tag', 'td' ) )[4]->as_trimmed_text; |
134
|
|
|
|
|
|
|
return { |
135
|
0
|
|
|
|
|
|
series => $series, |
136
|
|
|
|
|
|
|
book => "$bookname($progress)", |
137
|
|
|
|
|
|
|
url => $self->base_url() . "/$book_url", |
138
|
|
|
|
|
|
|
}; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
} ## end sub parse_writer_book_info |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub make_query_request { |
143
|
|
|
|
|
|
|
|
144
|
0
|
|
|
0
|
1
|
|
my ( $self, $keyword, %opt ) = @_; |
145
|
0
|
|
0
|
|
|
|
$opt{query_type} ||= '作品'; |
146
|
|
|
|
|
|
|
|
147
|
0
|
|
|
|
|
|
my %qt = ( |
148
|
|
|
|
|
|
|
'作品' => '1', |
149
|
|
|
|
|
|
|
'作者' => '2', |
150
|
|
|
|
|
|
|
'主角' => '4', |
151
|
|
|
|
|
|
|
'配角' => '5', |
152
|
|
|
|
|
|
|
'其他' => '6', |
153
|
|
|
|
|
|
|
); |
154
|
|
|
|
|
|
|
|
155
|
0
|
|
|
|
|
|
$keyword = $self->encode_cjk_for_url($keyword); |
156
|
0
|
|
|
|
|
|
my $url = $self->base_url() . qq[/search.php?kw=$keyword&t=$qt{$opt{query_type}}]; |
157
|
|
|
|
|
|
|
#$url = encode( $self->charset(), $url ); |
158
|
|
|
|
|
|
|
|
159
|
0
|
|
|
|
|
|
return $url; |
160
|
|
|
|
|
|
|
} ## end sub make_query_request |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
sub parse_query_list { |
163
|
0
|
|
|
0
|
0
|
|
my ( $self, $h ) = @_; |
164
|
|
|
|
|
|
|
my $parse_query = scraper { |
165
|
|
|
|
|
|
|
process '//div[@class="page"]/a', 'urls[]' => sub { |
166
|
0
|
0
|
|
|
|
|
return unless ( $_[0]->as_text =~ /^\[\d*\]$/ ); |
167
|
0
|
|
|
|
|
|
my $url = $self->base_url() . ( $_[0]->attr( 'href' ) ); |
168
|
0
|
|
|
|
|
|
$url = encode( $self->charset(), $url ); |
169
|
0
|
|
|
|
|
|
return $url; |
170
|
0
|
|
|
0
|
|
|
}; |
171
|
0
|
|
|
|
|
|
}; |
172
|
0
|
|
|
|
|
|
my $r = $parse_query->scrape( $h ); |
173
|
0
|
|
0
|
|
|
|
return $r->{urls} || []; |
174
|
|
|
|
|
|
|
} ## |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub parse_query_item { |
177
|
0
|
|
|
0
|
0
|
|
my ( $self, $h ) = @_; |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
my $parse_query = scraper { |
180
|
0
|
|
|
0
|
|
|
process '//h3[@class="title"]/a', |
181
|
|
|
|
|
|
|
'books[]' => { |
182
|
|
|
|
|
|
|
'book' => 'TEXT', |
183
|
|
|
|
|
|
|
'url' => '@href', |
184
|
|
|
|
|
|
|
}; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
process '//div[@class="info"]', 'writers[]' => sub { |
187
|
0
|
|
|
|
|
|
my ( $writer, $progress ) = $_[0]->as_text =~ /作者:(.+?) \┃ 进度:(\S+)/s; |
188
|
0
|
|
|
|
|
|
return { writer => $writer, progress => $progress }; |
189
|
0
|
|
|
|
|
|
}; |
190
|
0
|
|
|
|
|
|
}; |
191
|
0
|
|
|
|
|
|
my $ref = $parse_query->scrape( $h ); |
192
|
|
|
|
|
|
|
|
193
|
0
|
|
|
|
|
|
my @result; |
194
|
0
|
|
|
|
|
|
foreach my $i ( 0 .. $#{ $ref->{books} } ) { |
|
0
|
|
|
|
|
|
|
195
|
0
|
|
|
|
|
|
my $r = $ref->{books}[$i]; |
196
|
0
|
0
|
|
|
|
|
next unless ( $r->{url} ); |
197
|
|
|
|
|
|
|
|
198
|
0
|
|
|
|
|
|
my $w = $ref->{writers}[$i]; |
199
|
0
|
|
|
|
|
|
$r->{title} .= "($w->{progress})"; |
200
|
0
|
|
|
|
|
|
push @result, { %$w, %$r }; |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
0
|
|
|
|
|
|
return \@result; |
204
|
|
|
|
|
|
|
} ## end sub parse_query_item |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
1; |