line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WWW::KGS::GameArchives; |
2
|
3
|
|
|
3
|
|
208911
|
use 5.008_009; |
|
3
|
|
|
|
|
13
|
|
|
3
|
|
|
|
|
249
|
|
3
|
3
|
|
|
3
|
|
21
|
use strict; |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
104
|
|
4
|
3
|
|
|
3
|
|
16
|
use warnings; |
|
3
|
|
|
|
|
11
|
|
|
3
|
|
|
|
|
110
|
|
5
|
3
|
|
|
3
|
|
8133
|
use URI; |
|
3
|
|
|
|
|
67765
|
|
|
3
|
|
|
|
|
109
|
|
6
|
3
|
|
|
3
|
|
3691
|
use Web::Scraper; |
|
3
|
|
|
|
|
520613
|
|
|
3
|
|
|
|
|
22
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
our $VERSION = '0.06'; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
sub new { |
11
|
2
|
|
|
2
|
0
|
18270
|
my $class = shift; |
12
|
2
|
50
|
|
|
|
15
|
my %args = @_ == 1 ? %{$_[0]} : @_; |
|
0
|
|
|
|
|
0
|
|
13
|
2
|
|
|
|
|
11
|
bless \%args, $class; |
14
|
|
|
|
|
|
|
} |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub base_uri { |
17
|
4
|
|
66
|
4
|
1
|
16999
|
$_[0]->{base_uri} ||= URI->new('http://www.gokgs.com/gameArchives.jsp'); |
18
|
|
|
|
|
|
|
} |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
sub user_agent { |
21
|
0
|
|
|
0
|
1
|
0
|
$_[0]->{user_agent}; |
22
|
|
|
|
|
|
|
} |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
sub has_user_agent { |
25
|
2
|
|
|
2
|
0
|
16
|
exists $_[0]->{user_agent}; |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub _scraper { |
29
|
3
|
|
|
3
|
|
546
|
my $self = shift; |
30
|
3
|
|
66
|
|
|
34
|
$self->{scraper} ||= $self->_build_scraper; |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
sub _build_scraper { |
34
|
2
|
|
|
2
|
|
5
|
my $self = shift; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
my $scraper = scraper { |
37
|
2
|
|
|
2
|
|
57256
|
process 'h2', 'summary' => 'TEXT'; |
38
|
|
|
|
|
|
|
process '//table[tr/th/text()="Viewable?"]//following-sibling::tr', 'games[]' => scraper { |
39
|
2
|
|
|
|
|
67113
|
process '//a[contains(@href,".sgf")]', 'kifu_uri' => '@href'; |
40
|
2
|
|
|
|
|
30380
|
process '//td[2]//a', 'white[]' => { name => 'TEXT', link => '@href' }; |
41
|
2
|
|
|
|
|
7778
|
process '//td[3]//a', 'black[]' => { name => 'TEXT', link => '@href' }; |
42
|
2
|
|
|
|
|
7643
|
process '//td[3]', 'maybe_setup' => 'TEXT'; |
43
|
2
|
|
|
|
|
6364
|
process '//td[4]', 'setup' => 'TEXT'; |
44
|
2
|
|
|
|
|
6110
|
process '//td[5]', 'start_time' => 'TEXT'; |
45
|
2
|
|
|
|
|
6157
|
process '//td[6]', 'type' => 'TEXT'; |
46
|
2
|
|
|
|
|
6333
|
process '//td[7]', 'result' => 'TEXT'; |
47
|
2
|
|
|
|
|
6462
|
process '//td[8]', 'tag' => 'TEXT'; |
48
|
2
|
|
|
|
|
12663
|
}; |
49
|
2
|
|
|
|
|
24792
|
process '//a[contains(@href,".zip")]', 'zip_uri' => '@href'; |
50
|
2
|
|
|
|
|
39931
|
process '//a[contains(@href,".tar.gz")]', 'tgz_uri' => '@href'; |
51
|
|
|
|
|
|
|
process '//table[descendant::tr/th/text()="Year"]//following-sibling::tr', 'calendar[]' => scraper { |
52
|
2
|
|
|
|
|
57897
|
process 'td', 'year' => 'TEXT'; |
53
|
|
|
|
|
|
|
process qq{//following-sibling::td[text()!="\x{a0}"]}, 'month[]' => scraper { |
54
|
3
|
|
|
|
|
33560
|
process '.', 'name' => 'TEXT'; |
55
|
3
|
|
|
|
|
14264
|
process 'a', 'link' => '@href'; |
56
|
2
|
|
|
|
|
4231
|
}; |
57
|
2
|
|
|
|
|
63250
|
}; |
58
|
2
|
|
|
|
|
23
|
}; |
59
|
|
|
|
|
|
|
|
60
|
2
|
50
|
|
|
|
28
|
$scraper->user_agent( $self->user_agent ) if $self->has_user_agent; |
61
|
|
|
|
|
|
|
|
62
|
2
|
|
|
|
|
28
|
$scraper; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub scrape { |
66
|
2
|
|
|
2
|
1
|
8364
|
my $self = shift; |
67
|
2
|
|
|
|
|
9
|
my $result = $self->_scraper->scrape( @_ ); |
68
|
2
|
|
|
|
|
7649
|
my $games = $result->{games}; |
69
|
2
|
|
|
|
|
8
|
my $calendar = $result->{calendar}; |
70
|
|
|
|
|
|
|
|
71
|
2
|
50
|
|
|
|
11
|
return $result unless $calendar; |
72
|
|
|
|
|
|
|
|
73
|
2
|
|
|
|
|
5
|
my @calendar; |
74
|
2
|
|
|
|
|
7
|
for my $c ( @$calendar ) { |
75
|
2
|
|
|
|
|
4
|
for my $month ( @{$c->{month}} ) { |
|
2
|
|
|
|
|
5
|
|
76
|
3
|
|
|
|
|
12
|
$month->{year} = $c->{year}; |
77
|
3
|
|
|
|
|
9
|
$month->{month} = delete $month->{name}; # rename |
78
|
3
|
|
|
|
|
13
|
push @calendar, $month; |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
2
|
100
|
66
|
|
|
21
|
if ( @calendar == 1 and $calendar[0]{year} == 1970 ) { # KGS's bug |
83
|
1
|
|
|
|
|
3
|
delete $result->{calendar}; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
else { |
86
|
1
|
|
|
|
|
4
|
$result->{calendar} = \@calendar; |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
2
|
100
|
|
|
|
19
|
return $result unless $games; |
90
|
|
|
|
|
|
|
|
91
|
1
|
|
|
|
|
4
|
for my $game ( @$games ) { |
92
|
2
|
|
|
|
|
6
|
my $maybe_setup = delete $game->{maybe_setup}; |
93
|
2
|
50
|
|
|
|
10
|
next if exists $game->{black}; |
94
|
0
|
|
|
|
|
0
|
my $users = delete $game->{white}; # | |
95
|
0
|
0
|
|
|
|
0
|
if ( @$users == 1 ) { # Type: Demonstration |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
96
|
0
|
|
|
|
|
0
|
$game->{editor} = $users->[0]; |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
elsif ( @$users == 3 ) { # Type: Review |
99
|
0
|
|
|
|
|
0
|
$game->{editor} = $users->[0]; |
100
|
0
|
|
|
|
|
0
|
$game->{white} = [ $users->[1] ]; |
101
|
0
|
|
|
|
|
0
|
$game->{black} = [ $users->[2] ]; |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
elsif ( @$users == 5 ) { # Type: Rengo Review |
104
|
0
|
|
|
|
|
0
|
$game->{editor} = $users->[0]; |
105
|
0
|
|
|
|
|
0
|
$game->{white} = [ @{$users}[1,2] ]; |
|
0
|
|
|
|
|
0
|
|
106
|
0
|
|
|
|
|
0
|
$game->{black} = [ @{$users}[3,4] ]; |
|
0
|
|
|
|
|
0
|
|
107
|
|
|
|
|
|
|
} |
108
|
0
|
0
|
|
|
|
0
|
$game->{tag} = delete $game->{result} if exists $game->{result}; |
109
|
0
|
|
|
|
|
0
|
$game->{result} = delete $game->{type}; |
110
|
0
|
|
|
|
|
0
|
$game->{type} = delete $game->{start_time}; |
111
|
0
|
|
|
|
|
0
|
$game->{start_time} = delete $game->{setup}; |
112
|
0
|
|
|
|
|
0
|
$game->{setup} = $maybe_setup; |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
1
|
|
|
|
|
4
|
@$games = reverse @$games; # sort by Start Time in descending order |
116
|
|
|
|
|
|
|
|
117
|
1
|
|
|
|
|
7
|
$result; |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub query { |
121
|
0
|
|
|
0
|
1
|
|
my ( $self, @query ) = @_; |
122
|
0
|
|
|
|
|
|
my $uri = $self->base_uri->clone; |
123
|
0
|
|
|
|
|
|
$uri->query_form( @query ); |
124
|
0
|
|
|
|
|
|
$self->scrape( $uri ); |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
1; |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
__END__ |