line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Sweat::Article; |
2
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
25
|
use warnings; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
115
|
|
4
|
3
|
|
|
3
|
|
20
|
use strict; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
86
|
|
5
|
3
|
|
|
3
|
|
19
|
use Moo; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
18
|
|
6
|
3
|
|
|
3
|
|
7554
|
use namespace::clean; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
22
|
|
7
|
3
|
|
|
3
|
|
719
|
use utf8::all; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
36
|
|
8
|
|
|
|
|
|
|
|
9
|
3
|
|
|
3
|
|
4859
|
use Types::Standard qw( Str Maybe ); |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
32
|
|
10
|
|
|
|
|
|
|
|
11
|
3
|
|
|
3
|
|
2242
|
use Scalar::Util qw( blessed ); |
|
3
|
|
|
|
|
9
|
|
|
3
|
|
|
|
|
228
|
|
12
|
3
|
|
|
3
|
|
1826
|
use HTML::Strip; |
|
3
|
|
|
|
|
23599
|
|
|
3
|
|
|
|
|
150
|
|
13
|
3
|
|
|
3
|
|
24
|
use List::Util qw( shuffle ); |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
201
|
|
14
|
3
|
|
|
3
|
|
23
|
use MediaWiki::API; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
4956
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
has 'text' => ( |
17
|
|
|
|
|
|
|
is => 'ro', |
18
|
|
|
|
|
|
|
required => 1, |
19
|
|
|
|
|
|
|
isa => Str, |
20
|
|
|
|
|
|
|
); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
has 'title' => ( |
23
|
|
|
|
|
|
|
is => 'ro', |
24
|
|
|
|
|
|
|
required => 1, |
25
|
|
|
|
|
|
|
isa => Str, |
26
|
|
|
|
|
|
|
); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
has 'url' => ( |
29
|
|
|
|
|
|
|
is => 'ro', |
30
|
|
|
|
|
|
|
required => 1, |
31
|
|
|
|
|
|
|
); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
our $stripper = HTML::Strip->new; |
34
|
|
|
|
|
|
|
our $mw = MediaWiki::API->new; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
$mw->{config}->{max_lag} = 5; |
37
|
|
|
|
|
|
|
$mw->{config}->{max_lag_delay} = 1; |
38
|
|
|
|
|
|
|
$mw->{ua}->timeout( 10 ); |
39
|
|
|
|
|
|
|
our $language; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
our %seen_titles; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
sub new_from_newsapi_article { |
44
|
0
|
|
|
0
|
0
|
|
my ( $class, $newsapi_article ) = @_; |
45
|
|
|
|
|
|
|
|
46
|
0
|
0
|
0
|
|
|
|
die "Expected a NewsAPI article, got $newsapi_article" |
47
|
|
|
|
|
|
|
unless blessed($newsapi_article) |
48
|
|
|
|
|
|
|
&& $newsapi_article->isa( 'Web::NewsAPI::Article' ); |
49
|
|
|
|
|
|
|
|
50
|
0
|
|
0
|
|
|
|
my $sweat_article = $class->new( |
|
|
|
0
|
|
|
|
|
51
|
|
|
|
|
|
|
text => ($newsapi_article->title // q{}) |
52
|
|
|
|
|
|
|
. q{. } |
53
|
|
|
|
|
|
|
. ($newsapi_article->description // q{}), |
54
|
|
|
|
|
|
|
url => $newsapi_article->url, |
55
|
|
|
|
|
|
|
title => $newsapi_article->title, |
56
|
|
|
|
|
|
|
); |
57
|
|
|
|
|
|
|
|
58
|
0
|
|
|
|
|
|
return $sweat_article; |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub new_from_random_wikipedia_article { |
62
|
0
|
|
|
0
|
0
|
|
my ($class) = @_; |
63
|
|
|
|
|
|
|
|
64
|
0
|
|
|
|
|
|
my $title = _get_random_title(); |
65
|
0
|
|
|
|
|
|
return $class->new_from_wikipedia_title($title); |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub new_from_linked_wikipedia_article { |
69
|
0
|
|
|
0
|
0
|
|
my ($class, $article) = @_; |
70
|
|
|
|
|
|
|
|
71
|
0
|
|
|
|
|
|
my $title = _get_random_title_linked_from_title($article->title); |
72
|
0
|
|
|
|
|
|
return $class->new_from_wikipedia_title($title); |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
sub new_from_wikipedia_title { |
76
|
0
|
|
|
0
|
0
|
|
my ($class, $title) = @_; |
77
|
|
|
|
|
|
|
|
78
|
0
|
|
|
|
|
|
my $summary = _get_summary_for_title($title); |
79
|
0
|
|
|
|
|
|
my $tries = 0; |
80
|
0
|
|
0
|
|
|
|
until ($summary || ($tries >= 3) ) { |
81
|
0
|
|
|
|
|
|
$tries++; |
82
|
0
|
|
|
|
|
|
$title = _get_random_title_linked_from_title($title); |
83
|
0
|
|
|
|
|
|
$summary = _get_summary_for_title($title); |
84
|
|
|
|
|
|
|
} |
85
|
0
|
0
|
|
|
|
|
unless ( $summary ) { |
86
|
0
|
|
|
|
|
|
$title = _get_random_title(); |
87
|
0
|
|
|
|
|
|
$summary = _get_summary_for_title($title); |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
0
|
|
|
|
|
|
return $class->new( |
91
|
|
|
|
|
|
|
title => $title, |
92
|
|
|
|
|
|
|
text => $summary, |
93
|
|
|
|
|
|
|
url => "https://$language.wikipedia.org/wiki/$title", |
94
|
|
|
|
|
|
|
); |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub _get_random_title { |
98
|
0
|
|
|
0
|
|
|
my $result = $mw->api( { |
99
|
|
|
|
|
|
|
list => 'random', |
100
|
|
|
|
|
|
|
action => 'query', |
101
|
|
|
|
|
|
|
rnnamespace => 0, |
102
|
|
|
|
|
|
|
} ); |
103
|
|
|
|
|
|
|
|
104
|
0
|
|
|
|
|
|
return $result->{query}->{random}->[0]->{title}; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
sub _get_summary_for_title { |
108
|
0
|
|
|
0
|
|
|
my ($title) = @_; |
109
|
|
|
|
|
|
|
|
110
|
0
|
|
|
|
|
|
my $result = $mw->api( { |
111
|
|
|
|
|
|
|
action => 'query', |
112
|
|
|
|
|
|
|
prop => 'extracts', |
113
|
|
|
|
|
|
|
exintro => undef, |
114
|
|
|
|
|
|
|
titles => $title, |
115
|
|
|
|
|
|
|
} ); |
116
|
|
|
|
|
|
|
|
117
|
0
|
|
|
|
|
|
my $summary = (values(%{$result->{query}->{pages}}))[0]->{extract}; |
|
0
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
|
119
|
0
|
0
|
|
|
|
|
if (defined $summary) { |
120
|
0
|
|
|
|
|
|
$summary = $stripper->parse( $summary ); |
121
|
|
|
|
|
|
|
# Eliminate all parentheticals (birth/death dates, alternate-language |
122
|
|
|
|
|
|
|
# representations, and so on) because they don't read out loud well. |
123
|
0
|
|
|
|
|
|
my $found_some; |
124
|
0
|
|
0
|
|
|
|
until ( defined($found_some) && not($found_some) ) { |
125
|
0
|
|
|
|
|
|
$found_some = $summary =~ s/\([^\(]*?\)//g; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
# Clean up redundant whitespace, and whitespace before punctuation. |
128
|
0
|
|
|
|
|
|
$summary =~ s/\s+([,.!?;:])/$1/g; |
129
|
0
|
|
|
|
|
|
$summary =~ s/ {2,}/ /g; |
130
|
|
|
|
|
|
|
} |
131
|
0
|
0
|
0
|
|
|
|
if ( $summary && $summary =~ /\S/ ) { |
132
|
0
|
|
|
|
|
|
return $summary; |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
else { |
135
|
0
|
|
|
|
|
|
return undef; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub _erase_parentheticals { |
140
|
0
|
|
|
0
|
|
|
my ( $summary, $opener, $closer ) = @_; |
141
|
0
|
|
|
|
|
|
my $found_some; |
142
|
0
|
|
0
|
|
|
|
until ( defined($found_some) && not($found_some) ) { |
143
|
0
|
|
|
|
|
|
$found_some = $summary =~ s/\([^\(]*?\)//g; |
144
|
0
|
|
|
|
|
|
warn "Found some: $found_some\n"; |
145
|
|
|
|
|
|
|
} |
146
|
0
|
|
|
|
|
|
return $summary; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
sub _get_random_title_linked_from_title { |
151
|
0
|
|
|
0
|
|
|
my ($title) = @_; |
152
|
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
|
my $result = $mw->api( { |
154
|
|
|
|
|
|
|
action => 'query', |
155
|
|
|
|
|
|
|
prop => 'links', |
156
|
|
|
|
|
|
|
titles => $title, |
157
|
|
|
|
|
|
|
plnamespace => 0, |
158
|
|
|
|
|
|
|
pllimit => 100, |
159
|
|
|
|
|
|
|
} ); |
160
|
|
|
|
|
|
|
|
161
|
0
|
|
|
|
|
|
my $links_ref = (values(%{$result->{query}->{pages}}))[0]->{links}; |
|
0
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
|
163
|
0
|
|
|
|
|
|
my @links = shuffle(@$links_ref); |
164
|
|
|
|
|
|
|
|
165
|
0
|
|
|
|
|
|
my $linked_title; |
166
|
|
|
|
|
|
|
|
167
|
0
|
|
0
|
|
|
|
until ($linked_title || (@links == 0 )) { |
168
|
0
|
0
|
|
|
|
|
if (defined $links[0]) { |
169
|
0
|
|
|
|
|
|
my $proposed_title = $links[0]->{title}; |
170
|
|
|
|
|
|
|
# Skip: |
171
|
|
|
|
|
|
|
# * Any title we've already seen |
172
|
|
|
|
|
|
|
# * Any title with a numeral in it (to stay away from annual- |
173
|
|
|
|
|
|
|
# statistics gravity wells) |
174
|
|
|
|
|
|
|
# * Any title with a word suggesting it's a just a list or table |
175
|
0
|
0
|
0
|
|
|
|
unless ( |
176
|
|
|
|
|
|
|
$seen_titles{$proposed_title} |
177
|
|
|
|
|
|
|
|| |
178
|
|
|
|
|
|
|
$proposed_title =~ /\d|^list of\s|^comparison of\s|^table of\s/i |
179
|
|
|
|
|
|
|
) { |
180
|
0
|
|
|
|
|
|
$linked_title = $proposed_title; |
181
|
0
|
|
|
|
|
|
$seen_titles{$proposed_title} = 1; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
} |
184
|
0
|
|
|
|
|
|
shift @links; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
0
|
0
|
|
|
|
|
if ($linked_title) { |
188
|
0
|
|
|
|
|
|
return $linked_title; |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
else { |
191
|
0
|
|
|
|
|
|
return _get_random_title(); |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
1; |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=head1 Sweat::Article - Library for the `sweat` command-line program |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head1 DESCRIPTION |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
This library is intended for internal use by the L<sweat> command-line program, |
203
|
|
|
|
|
|
|
and as such offers no publicly documented methods. |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=head1 SEE ALSO |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
L<sweat> |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head1 AUTHOR |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Jason McIntosh <jmac@jmac.org> |