line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# ABSTRACT: https://www.jjwxc.net |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=pod |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=encoding utf8 |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 FUNCTION |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head2 make_query_request |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
#$type:作品,作者,主角,配角,其他 |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
$parser->make_query_request( $type, $keyword ); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=cut |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
package Novel::Robot::Parser::jjwxc; |
18
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
31
|
|
19
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
23
|
|
20
|
1
|
|
|
1
|
|
5
|
use utf8; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
6
|
|
21
|
1
|
|
|
1
|
|
23
|
use base 'Novel::Robot::Parser'; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
109
|
|
22
|
|
|
|
|
|
|
|
23
|
1
|
|
|
1
|
|
7
|
use Web::Scraper; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
12
|
|
24
|
1
|
|
|
1
|
|
103
|
use Encode; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
115
|
|
25
|
1
|
|
|
1
|
|
7
|
use Data::Dumper; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1829
|
|
26
|
|
|
|
|
|
|
|
27
|
0
|
|
|
0
|
0
|
0
|
sub base_url { 'https://www.jjwxc.net' } |
28
|
|
|
|
|
|
|
|
29
|
0
|
|
|
0
|
0
|
0
|
sub domain { 'jjwxc.net' } |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub generate_novel_url { |
32
|
1
|
|
|
1
|
0
|
3
|
my ( $self, $index_url ) = @_; |
33
|
1
|
|
|
|
|
6
|
my ( $novelid ) = $index_url =~ m#novelid=(\d+)#; |
34
|
1
|
50
|
|
|
|
5
|
my $u = $novelid ? "https://m.jjwxc.net/book2/$novelid?more=0&whole=1" : $index_url; |
35
|
1
|
|
|
|
|
4
|
return $u; |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub parse_novel { |
39
|
1
|
|
|
1
|
0
|
4
|
my ( $self, $h ) = @_; |
40
|
1
|
|
|
|
|
129
|
$$h =~ s#本书霸王票读者排行.*##s; |
41
|
|
|
|
|
|
|
|
42
|
1
|
|
|
|
|
3
|
my %r; |
43
|
1
|
|
|
|
|
14
|
( $r{book}, $r{writer} ) = $$h =~ m#\s*《(.+?)》(.+?)_晋江文学城#s; |
44
|
|
|
|
|
|
|
|
45
|
1
|
|
|
|
|
47
|
my ( $cc ) = $$h =~ m#章节列表: .+?(#s; |
46
|
1
|
|
|
|
|
48
|
my @f = $cc =~ m#(.+?)#sg; |
47
|
1
|
|
|
|
|
6
|
my $max_chapter_num = ( $#f + 1 ) / 2; |
48
|
1
|
|
|
|
|
5
|
for my $i ( 1 .. $max_chapter_num ) { |
49
|
10
|
|
|
|
|
19
|
my $j = 2 * $i - 1; |
50
|
10
|
|
|
|
|
16
|
my $t = $f[$j]; |
51
|
10
|
|
|
|
|
51
|
$t =~ s/^\d+\.( )*//; |
52
|
10
|
|
|
|
|
46
|
$t =~ s/ / /g; |
53
|
10
|
|
|
|
|
56
|
$t =~ s/^.+>//; |
54
|
10
|
|
|
|
|
57
|
$t =~ s/\s+/ /g; |
55
|
|
|
|
|
|
|
|
56
|
10
|
|
|
|
|
18
|
my $ui = 2 * $i - 2; |
57
|
10
|
|
|
|
|
20
|
my $u = "https://m.jjwxc.net$f[$ui]"; |
58
|
10
|
|
|
|
|
15
|
push @{ $r{item_list} }, { id => $i, title => $t, url => $u }; |
|
10
|
|
|
|
|
41
|
|
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
1
|
|
|
|
|
9
|
return \%r; |
62
|
|
|
|
|
|
|
} ## end sub parse_novel |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
sub parse_novel_item { |
65
|
1
|
|
|
1
|
0
|
4
|
my ( $self, $h ) = @_; |
66
|
|
|
|
|
|
|
|
67
|
1
|
|
|
|
|
174
|
my ( $c ) = $$h =~ m#]+>.+?]*>(.+?)#s; |
68
|
|
|
|
|
|
|
|
69
|
1
|
|
50
|
|
|
18
|
return { content => $c || '' }; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub parse_board { |
73
|
0
|
|
|
0
|
0
|
|
my ( $self, $h ) = @_; |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
my $parse_writer = scraper { |
76
|
0
|
|
|
0
|
|
|
process_first '//tr[@valign="bottom"]//b', writer => 'TEXT'; |
77
|
0
|
|
|
|
|
|
}; |
78
|
0
|
|
|
|
|
|
my $ref = $parse_writer->scrape( $h ); |
79
|
|
|
|
|
|
|
|
80
|
0
|
|
|
|
|
|
$self->tidy_string( $ref, 'writer' ); |
81
|
0
|
|
|
|
|
|
return { writer => $ref->{writer} }; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub parse_board_item { |
85
|
0
|
|
|
0
|
0
|
|
my ( $self, $h ) = @_; |
86
|
0
|
|
|
|
|
|
my @book_list; |
87
|
0
|
|
|
|
|
|
my $series = '未分类'; |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
my $parse_writer = scraper { |
90
|
|
|
|
|
|
|
process '//tr[@bgcolor="#eefaee"]', 'book_list[]' => sub { |
91
|
0
|
|
|
|
|
|
my $tr = $_[0]; |
92
|
0
|
|
|
|
|
|
$series = $self->parse_writer_series_name( $tr, $series ); |
93
|
|
|
|
|
|
|
|
94
|
0
|
|
|
|
|
|
my $book = $self->parse_writer_book_info( $tr, $series ); |
95
|
0
|
0
|
0
|
|
|
|
push @book_list, $book if ( $book and $book->{url} =~ /onebook/ ); |
96
|
0
|
|
|
0
|
|
|
}; |
97
|
0
|
|
|
|
|
|
}; |
98
|
|
|
|
|
|
|
|
99
|
0
|
|
|
|
|
|
my $ref = $parse_writer->scrape( $h ); |
100
|
|
|
|
|
|
|
|
101
|
0
|
|
|
|
|
|
$self->tidy_string( $ref, 'writer' ); |
102
|
0
|
|
|
|
|
|
$_->{writer} = $ref->{writer} for @book_list; |
103
|
|
|
|
|
|
|
|
104
|
0
|
|
|
|
|
|
return \@book_list; |
105
|
|
|
|
|
|
|
} ## end sub parse_board_item |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
sub parse_writer_series_name { |
108
|
0
|
|
|
0
|
0
|
|
my ( $self, $tr, $series ) = @_; |
109
|
|
|
|
|
|
|
|
110
|
0
|
0
|
|
|
|
|
return $series unless ( $tr->look_down( 'colspan', '7' ) ); |
111
|
|
|
|
|
|
|
|
112
|
0
|
0
|
|
|
|
|
if ( $tr->as_trimmed_text =~ /【(.*)】/ ) { |
113
|
0
|
|
|
|
|
|
$series = $1; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
0
|
|
|
|
|
|
return $series; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub parse_writer_book_info { |
120
|
0
|
|
|
0
|
0
|
|
my ( $self, $tr, $series ) = @_; |
121
|
|
|
|
|
|
|
|
122
|
0
|
|
|
|
|
|
my $book = $tr->look_down( '_tag', 'a' ); |
123
|
0
|
0
|
|
|
|
|
return unless ( $book ); |
124
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
my $book_url = $book->attr( 'href' ); |
126
|
|
|
|
|
|
|
|
127
|
0
|
|
|
|
|
|
my $bookname = $book->as_trimmed_text; |
128
|
0
|
|
|
|
|
|
substr( $bookname, 0, 1 ) = ''; |
129
|
0
|
0
|
|
|
|
|
$bookname .= '[锁]' if ( $tr->look_down( 'color', 'gray' ) ); |
130
|
|
|
|
|
|
|
|
131
|
0
|
|
|
|
|
|
my $progress = ( $tr->look_down( '_tag', 'td' ) )[4]->as_trimmed_text; |
132
|
|
|
|
|
|
|
return { |
133
|
0
|
|
|
|
|
|
series => $series, |
134
|
|
|
|
|
|
|
book => "$bookname($progress)", |
135
|
|
|
|
|
|
|
url => $self->base_url() . "/$book_url", |
136
|
|
|
|
|
|
|
}; |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
} ## end sub parse_writer_book_info |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub make_query_request { |
141
|
|
|
|
|
|
|
|
142
|
0
|
|
|
0
|
1
|
|
my ( $self, $keyword, %opt ) = @_; |
143
|
0
|
|
0
|
|
|
|
$opt{query_type} ||= '作品'; |
144
|
|
|
|
|
|
|
|
145
|
0
|
|
|
|
|
|
my %qt = ( |
146
|
|
|
|
|
|
|
'作品' => '1', |
147
|
|
|
|
|
|
|
'作者' => '2', |
148
|
|
|
|
|
|
|
'主角' => '4', |
149
|
|
|
|
|
|
|
'配角' => '5', |
150
|
|
|
|
|
|
|
'其他' => '6', |
151
|
|
|
|
|
|
|
); |
152
|
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
|
$keyword = $self->encode_cjk_for_url($keyword); |
154
|
0
|
|
|
|
|
|
my $url = $self->base_url() . qq[/search.php?kw=$keyword&t=$qt{$opt{query_type}}]; |
155
|
|
|
|
|
|
|
#$url = encode( $self->charset(), $url ); |
156
|
|
|
|
|
|
|
|
157
|
0
|
|
|
|
|
|
return $url; |
158
|
|
|
|
|
|
|
} ## end sub make_query_request |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub parse_query_list { |
161
|
0
|
|
|
0
|
0
|
|
my ( $self, $h ) = @_; |
162
|
|
|
|
|
|
|
my $parse_query = scraper { |
163
|
|
|
|
|
|
|
process '//div[@class="page"]/a', 'urls[]' => sub { |
164
|
0
|
0
|
|
|
|
|
return unless ( $_[0]->as_text =~ /^\[\d*\]$/ ); |
165
|
0
|
|
|
|
|
|
my $url = $self->base_url() . ( $_[0]->attr( 'href' ) ); |
166
|
0
|
|
|
|
|
|
$url = encode( $self->charset(), $url ); |
167
|
0
|
|
|
|
|
|
return $url; |
168
|
0
|
|
|
0
|
|
|
}; |
169
|
0
|
|
|
|
|
|
}; |
170
|
0
|
|
|
|
|
|
my $r = $parse_query->scrape( $h ); |
171
|
0
|
|
0
|
|
|
|
return $r->{urls} || []; |
172
|
|
|
|
|
|
|
} ## |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
sub parse_query_item { |
175
|
0
|
|
|
0
|
0
|
|
my ( $self, $h ) = @_; |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
my $parse_query = scraper { |
178
|
0
|
|
|
0
|
|
|
process '//h3[@class="title"]/a', |
179
|
|
|
|
|
|
|
'books[]' => { |
180
|
|
|
|
|
|
|
'book' => 'TEXT', |
181
|
|
|
|
|
|
|
'url' => '@href', |
182
|
|
|
|
|
|
|
}; |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
process '//div[@class="info"]', 'writers[]' => sub { |
185
|
0
|
|
|
|
|
|
my ( $writer, $progress ) = $_[0]->as_text =~ /作者:(.+?) \┃ 进度:(\S+)/s; |
186
|
0
|
|
|
|
|
|
return { writer => $writer, progress => $progress }; |
187
|
0
|
|
|
|
|
|
}; |
188
|
0
|
|
|
|
|
|
}; |
189
|
0
|
|
|
|
|
|
my $ref = $parse_query->scrape( $h ); |
190
|
|
|
|
|
|
|
|
191
|
0
|
|
|
|
|
|
my @result; |
192
|
0
|
|
|
|
|
|
foreach my $i ( 0 .. $#{ $ref->{books} } ) { |
|
0
|
|
|
|
|
|
|
193
|
0
|
|
|
|
|
|
my $r = $ref->{books}[$i]; |
194
|
0
|
0
|
|
|
|
|
next unless ( $r->{url} ); |
195
|
|
|
|
|
|
|
|
196
|
0
|
|
|
|
|
|
my $w = $ref->{writers}[$i]; |
197
|
0
|
|
|
|
|
|
$r->{title} .= "($w->{progress})"; |
198
|
0
|
|
|
|
|
|
push @result, { %$w, %$r }; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
0
|
|
|
|
|
|
return \@result; |
202
|
|
|
|
|
|
|
} ## end sub parse_query_item |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
1; |