line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Catmandu::Importer::MWTemplates; |
2
|
2
|
|
|
2
|
|
16271
|
use v5.14; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
72
|
|
3
|
|
|
|
|
|
|
|
4
|
2
|
|
|
2
|
|
436
|
use namespace::clean; |
|
2
|
|
|
|
|
12395
|
|
|
2
|
|
|
|
|
13
|
|
5
|
2
|
|
|
2
|
|
703
|
use Catmandu::Sane; |
|
2
|
|
|
|
|
64324
|
|
|
2
|
|
|
|
|
13
|
|
6
|
2
|
|
|
2
|
|
1395
|
use Furl; |
|
2
|
|
|
|
|
39411
|
|
|
2
|
|
|
|
|
65
|
|
7
|
2
|
|
|
2
|
|
14
|
use Moo; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
15
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.01'; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
with 'Catmandu::Importer'; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
has site => ( |
14
|
|
|
|
|
|
|
is => 'ro', |
15
|
|
|
|
|
|
|
coerce => sub { |
16
|
|
|
|
|
|
|
my ($site) = @_; |
17
|
|
|
|
|
|
|
if ($site =~ /^[a-z]+([_-][a-z])*$/) { |
18
|
|
|
|
|
|
|
$site =~ s/-/_/g; |
19
|
|
|
|
|
|
|
$site = "http://$site.wikipedia.org/"; |
20
|
|
|
|
|
|
|
} |
21
|
|
|
|
|
|
|
return $site; |
22
|
|
|
|
|
|
|
} |
23
|
|
|
|
|
|
|
); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
has page => ( |
26
|
|
|
|
|
|
|
is => 'ro' |
27
|
|
|
|
|
|
|
); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
has template => ( |
30
|
|
|
|
|
|
|
is => 'ro' |
31
|
|
|
|
|
|
|
); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
has wikilinks => ( |
34
|
|
|
|
|
|
|
is => 'ro', |
35
|
|
|
|
|
|
|
default => sub { 1 } |
36
|
|
|
|
|
|
|
); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
has tempname => ( |
39
|
|
|
|
|
|
|
is => 'ro', |
40
|
|
|
|
|
|
|
default => sub { 'TEMPLATE' } |
41
|
|
|
|
|
|
|
); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub generator { |
45
|
|
|
|
|
|
|
my ($self) = @_; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub { |
48
|
|
|
|
|
|
|
state $templates = $self->_extract; |
49
|
|
|
|
|
|
|
return unless $templates and @$templates; |
50
|
|
|
|
|
|
|
return shift @$templates; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
sub _extract { |
55
|
1
|
|
|
1
|
|
1
|
my ($self) = @_; |
56
|
1
|
|
|
|
|
2
|
my $text = ""; |
57
|
|
|
|
|
|
|
|
58
|
1
|
50
|
|
|
|
5
|
if ($self->site) { |
59
|
0
|
|
|
|
|
0
|
my $client = Furl->new; |
60
|
0
|
0
|
|
|
|
0
|
if (defined $self->page) { |
61
|
0
|
|
|
|
|
0
|
my $page = $self->page; |
62
|
0
|
|
|
|
|
0
|
my $url = $self->site . "wiki/$page?action=raw"; |
63
|
0
|
|
|
|
|
0
|
my $res = $client->get($url); |
64
|
0
|
0
|
|
|
|
0
|
if ($res->is_success) { |
65
|
0
|
|
|
|
|
0
|
$text = $res->decoded_content; |
66
|
|
|
|
|
|
|
} else { |
67
|
0
|
|
|
|
|
0
|
die "failed to get $url"; |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
} else { |
70
|
|
|
|
|
|
|
# TODO: read pages from input unless page is set |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
} else { |
73
|
1
|
|
|
|
|
5
|
my $fh = $self->fh; |
74
|
1
|
|
|
|
|
834
|
$text = do { local $/; <$fh> }; |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
50
|
|
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# TODO: add PAGE if page input mode set |
78
|
|
|
|
|
|
|
|
79
|
1
|
|
|
|
|
5
|
$self->_extract_template($text); |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# Parse arguments of one template call |
83
|
|
|
|
|
|
|
sub _template () { |
84
|
12
|
|
|
12
|
|
22
|
my ($self, $result, $name, $parameters) = @_; |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# {{foo bar}} calls Template:Foo_bar with upper case F. |
87
|
|
|
|
|
|
|
# Might not work for non-ASCII characters. |
88
|
12
|
|
|
|
|
18
|
$name = ucfirst($name); |
89
|
12
|
|
|
|
|
21
|
$name =~ s/ /_/g; |
90
|
|
|
|
|
|
|
|
91
|
12
|
|
|
|
|
14
|
my $template = { }; |
92
|
|
|
|
|
|
|
|
93
|
12
|
100
|
|
|
|
23
|
if (defined ($parameters)) { |
94
|
10
|
|
|
|
|
24
|
my ($field, $value); |
95
|
10
|
|
|
|
|
9
|
my $argc = 0; |
96
|
10
|
|
|
|
|
110
|
$parameters =~ s/^\|\s*(.*?)\s*$/$1/; |
97
|
10
|
|
|
|
|
129
|
foreach my $arg (split(/\s*\|\s*/, $parameters)) { |
98
|
51
|
|
|
|
|
28
|
$argc++; |
99
|
51
|
100
|
|
|
|
140
|
if ($arg =~ /^([^=]*?)\s*=\s*(.*)$/) { |
100
|
46
|
|
|
|
|
46
|
$field = $1; |
101
|
46
|
|
|
|
|
43
|
$value = $2; |
102
|
|
|
|
|
|
|
} else { |
103
|
5
|
|
|
|
|
2
|
$field = $argc; |
104
|
5
|
|
|
|
|
6
|
$value = $arg; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
51
|
50
|
|
|
|
93
|
if (!$self->wikilinks) { |
|
|
50
|
|
|
|
|
|
108
|
0
|
0
|
|
|
|
0
|
$value =~ s/\[\[([^\]]*?)(\$!([^\]]*))?\]\]/$2 ? $3 : $1/eg; |
|
0
|
|
|
|
|
0
|
|
109
|
|
|
|
|
|
|
} elsif ($self->wikilinks == 2) { |
110
|
0
|
|
|
|
|
0
|
$value =~ s/\[\[([^\]]*?)(\$!([^\]]*))?\]\]/$1/eg; |
|
0
|
|
|
|
|
0
|
|
111
|
|
|
|
|
|
|
} else { |
112
|
51
|
100
|
|
|
|
58
|
$value =~ s/\[\[([^\]]*?)(\$!([^\]]*))?\]\]/$2 ? "[[$1|$3]]" : "[[$1]]"/eg; |
|
3
|
|
|
|
|
14
|
|
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
51
|
|
|
|
|
108
|
$template->{$field} = $value; |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
12
|
50
|
|
|
|
23
|
if (!defined $self->template) { |
|
|
0
|
|
|
|
|
|
120
|
12
|
|
|
|
|
22
|
$template->{$self->tempname} = $name; |
121
|
12
|
|
|
|
|
13
|
push @$result, $template; |
122
|
|
|
|
|
|
|
} elsif ($self->template eq $name) { |
123
|
0
|
|
|
|
|
0
|
push @$result, $template; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
12
|
|
|
|
|
166
|
return "\@($name)"; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub _extract_template { |
130
|
1
|
|
|
1
|
|
3
|
my ($self, $text) = @_; |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# Perform various substitutions to get rid of troublesome wiki markup. |
133
|
|
|
|
|
|
|
# In its place, leave $something |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# silently drop HTML comments |
136
|
1
|
|
|
|
|
50
|
$text =~ s/<!--.*?-->//g; |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# ignore nowiki, non-greedy match, leave $nowiki |
139
|
1
|
|
|
|
|
46
|
$text =~ s/<nowiki>.*?<\/nowiki>/\$nowiki/g; |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# ignore math, non-greedy match, leave $math |
142
|
1
|
|
|
|
|
45
|
$text =~ s/<math>.*?<\/math>/\$math/g; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# wiki link with alternative text, leave $! |
145
|
|
|
|
|
|
|
# multiple passes handle image thumbnails |
146
|
1
|
|
|
|
|
7
|
for (my $i = 0; $i < 5; $i++) { |
147
|
5
|
|
|
|
|
458
|
$text =~ s/(\[\[[^\]\|{}]*)\|([^\]{}]*\]\])/$1\$!$2/g; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# These are not real template calls, leave $pagename |
151
|
1
|
|
|
|
|
56
|
$text =~ s/{{(CURRENT(DAY|DOW|MONTH|TIME(STAMP)?|VERSION|WEEK|YEAR)(ABBREV|NAME(GEN)?)?|(ARTICLE|NAME|SUBJECT|TALK)SPACE|NUMBEROF(ADMINS|ARTICLES|FILES|PAGES|USERS)(:R)?|(ARTICLE|BASE|FULL|SUB|SUBJECT|TALK)?PAGENAMEE?|REVISIONID|SCRIPTPATH|SERVER(NAME)?|SITENAME)}}/\$$1/g; |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# template parameter value with default, leave $! |
154
|
1
|
|
|
|
|
56
|
$text =~ s/{{{([^\|{}]*)\|([^{}]*)}}}/\$($1\$!$2)/g; |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
# template parameter values, leave $parameter |
157
|
1
|
|
|
|
|
55
|
$text =~ s/{{{([^{}]*)}}}/\$($1)/g; |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# template bang escape, leave $! |
160
|
1
|
|
|
|
|
50
|
$text =~ s/{{!}}/\$!/g; |
161
|
|
|
|
|
|
|
|
162
|
1
|
|
|
|
|
2
|
my $result = []; |
163
|
1
|
|
|
|
|
5
|
my $tempname = $self->tempname; |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# multiple passes handle nested template calls |
166
|
1
|
|
|
|
|
3
|
for (my $i = 0; $i < 5; $i++) { |
167
|
5
|
|
|
|
|
211
|
$text =~ s/{{\s*([^\|{}]*?)\s*(\|[^{}]*)?}}/&_template($self,$result,$1,$2)/eg; |
|
12
|
|
|
|
|
21
|
|
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
1
|
|
|
|
|
3
|
return $result; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
1; |
174
|
|
|
|
|
|
|
__END__ |