line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# WebFetch::Input::SiteNews |
2
|
|
|
|
|
|
|
# ABSTRACT: download and save SiteNews headlines from a local file |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Copyright (c) 1998-2022 Ian Kluft. This program is free software; you can |
5
|
|
|
|
|
|
|
# redistribute it and/or modify it under the terms of the GNU General Public |
6
|
|
|
|
|
|
|
# License Version 3. See https://www.gnu.org/licenses/gpl-3.0-standalone.html |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# pragmas to silence some warnings from Perl::Critic |
9
|
|
|
|
|
|
|
## no critic (Modules::RequireExplicitPackage) |
10
|
|
|
|
|
|
|
# This solves a catch-22 where parts of Perl::Critic want both package and use-strict to be first |
11
|
3
|
|
|
3
|
|
2082
|
use strict; |
|
3
|
|
|
|
|
9
|
|
|
3
|
|
|
|
|
100
|
|
12
|
3
|
|
|
3
|
|
15
|
use warnings; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
87
|
|
13
|
3
|
|
|
3
|
|
15
|
use utf8; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
29
|
|
14
|
|
|
|
|
|
|
## use critic (Modules::RequireExplicitPackage) |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
package WebFetch::Input::SiteNews; |
17
|
|
|
|
|
|
|
$WebFetch::Input::SiteNews::VERSION = '0.15.9'; |
18
|
3
|
|
|
3
|
|
181
|
use base "WebFetch"; |
|
3
|
|
|
|
|
11
|
|
|
3
|
|
|
|
|
322
|
|
19
|
|
|
|
|
|
|
|
20
|
3
|
|
|
3
|
|
19
|
use Carp; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
208
|
|
21
|
3
|
|
|
3
|
|
33
|
use Readonly; |
|
3
|
|
|
|
|
13
|
|
|
3
|
|
|
|
|
196
|
|
22
|
3
|
|
|
3
|
|
21
|
use Scalar::Util qw(reftype); |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
168
|
|
23
|
3
|
|
|
3
|
|
17
|
use DateTime; |
|
3
|
|
|
|
|
14
|
|
|
3
|
|
|
|
|
93
|
|
24
|
3
|
|
|
3
|
|
25
|
use DateTime::Format::ISO8601; |
|
3
|
|
|
|
|
19
|
|
|
3
|
|
|
|
|
8740
|
|
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# set defaults |
27
|
|
|
|
|
|
|
my ( $cat_priorities, $now ); |
28
|
|
|
|
|
|
|
Readonly::Array my @Options => ( "short_path|short=s", "long_path|long=s", ); |
29
|
|
|
|
|
|
|
Readonly::Scalar my $Usage => "--short short-output-file --long long-output-file"; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# configuration parameters |
32
|
|
|
|
|
|
|
Readonly::Scalar my $num_links => 5; |
33
|
|
|
|
|
|
|
Readonly::Array my @dt_keys => qw(locale time_zone); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# functions for access to internal data for testing |
36
|
|
|
|
|
|
|
## no critic (Subroutines::ProhibitUnusedPrivateSubroutines) |
37
|
|
|
|
|
|
|
sub _config_params |
38
|
|
|
|
|
|
|
{ |
39
|
4
|
|
|
4
|
|
44
|
return { Options => \@Options, Usage => $Usage, num_links => $num_links }; |
40
|
|
|
|
|
|
|
} |
41
|
0
|
|
|
0
|
|
0
|
sub _cat_priorities { return $cat_priorities; } |
42
|
|
|
|
|
|
|
## critic (Subroutines::ProhibitUnusedPrivateSubroutines) |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# no user-servicable parts beyond this point |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# register capabilities with WebFetch |
47
|
|
|
|
|
|
|
__PACKAGE__->module_register( _config_params(), "cmdline", "input:sitenews", "output:sitenews_long" ); |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# constants for state names |
50
|
299
|
|
|
299
|
0
|
585
|
sub initial_state { return 0; } |
51
|
184
|
|
|
184
|
0
|
426
|
sub attr_state { return 1; } |
52
|
234
|
|
|
234
|
0
|
587
|
sub text_state { return 2; } |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# fetch() is the WebFetch API call point to run this module |
55
|
|
|
|
|
|
|
sub fetch |
56
|
|
|
|
|
|
|
{ |
57
|
2
|
|
|
2
|
1
|
6
|
my ($self) = @_; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# set parameters for WebFetch routines |
60
|
2
|
50
|
|
|
|
7
|
if ( not defined $self->{num_links} ) { |
61
|
2
|
|
|
|
|
9
|
$self->set_param( "num_links", WebFetch->config("num_links") ); |
62
|
|
|
|
|
|
|
} |
63
|
2
|
|
|
|
|
7
|
$self->set_param( "para", 1 ); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# set up Webfetch Embedding API data |
66
|
2
|
|
|
|
|
4
|
$self->{actions} = {}; |
67
|
2
|
|
|
|
|
14
|
$self->data->add_fields( "date", "title", "priority", "expired", "position", "label", "url", "category", "text" ); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# defined which fields match to which "well-known field names" |
70
|
2
|
|
|
|
|
11
|
$self->data->add_wk_names( |
71
|
|
|
|
|
|
|
"title" => "title", |
72
|
|
|
|
|
|
|
"url" => "url", |
73
|
|
|
|
|
|
|
"date" => "date", |
74
|
|
|
|
|
|
|
"summary" => "text", |
75
|
|
|
|
|
|
|
"category" => "category" |
76
|
|
|
|
|
|
|
); |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# process the links |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# get local time for various date comparisons |
81
|
2
|
|
|
|
|
4
|
my %dt_opts; |
82
|
2
|
50
|
33
|
|
|
10
|
if ( exists $self->{datetime_settings} and reftype( $self->{datetime_settings} ) eq "HASH" ) { |
83
|
0
|
|
|
|
|
0
|
%dt_opts = %{ $self->{datetime_settings} }; # get locale and time_zone settings if available |
|
0
|
|
|
|
|
0
|
|
84
|
|
|
|
|
|
|
} |
85
|
2
|
50
|
|
|
|
6
|
if ( exists $self->{testing_faketime} ) { |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# use a pre-specified timestamp for testing purposes so news elements are same age as expected result |
88
|
2
|
|
|
|
|
24
|
$now = DateTime::Format::ISO8601->parse_datetime( $self->{testing_faketime}, %dt_opts ); |
89
|
2
|
50
|
|
|
|
1729
|
if ( exists $dt_opts{locale} ) { |
90
|
0
|
|
|
|
|
0
|
$now->set_locale( $dt_opts{locale} ); |
91
|
|
|
|
|
|
|
} |
92
|
2
|
50
|
|
|
|
8
|
if ( exists $dt_opts{time_zone} ) { |
93
|
0
|
|
|
|
|
0
|
$now->set_time_zone( $dt_opts{time_zone} ); |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
} else { |
96
|
0
|
|
|
|
|
0
|
$now = DateTime->now(%dt_opts); |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# parse data file(s) |
100
|
2
|
|
|
|
|
4
|
my @sources; |
101
|
2
|
50
|
33
|
|
|
14
|
if ( ( exists $self->{sources} ) and ( ref $self->{sources} eq "ARRAY" ) ) { |
102
|
0
|
|
|
|
|
0
|
@sources = @{ $self->{sources} }; |
|
0
|
|
|
|
|
0
|
|
103
|
|
|
|
|
|
|
} |
104
|
2
|
50
|
|
|
|
8
|
if ( exists $self->{source} ) { |
105
|
2
|
|
|
|
|
7
|
push @sources, $self->{source}; |
106
|
|
|
|
|
|
|
} |
107
|
2
|
|
|
|
|
7
|
foreach my $source (@sources) { |
108
|
2
|
|
|
|
|
6
|
$self->parse_input($source); |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# set parameters for the short news format |
112
|
2
|
50
|
|
|
|
10
|
if ( defined $self->{short_path} ) { |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# create the HTML actions list |
115
|
2
|
|
|
|
|
10
|
$self->{actions}{html} = []; |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# create the HTML-generation parameters |
118
|
2
|
|
|
|
|
5
|
my $params = {}; |
119
|
2
|
|
|
|
|
6
|
$params = {}; |
120
|
|
|
|
|
|
|
$params->{sort_func} = sub { |
121
|
48
|
|
|
48
|
|
79
|
my ( $a, $b ) = @_; |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
# sort/compare news entries for the short display |
124
|
|
|
|
|
|
|
# sorting priority: |
125
|
|
|
|
|
|
|
# expiration status first (expired items last) |
126
|
|
|
|
|
|
|
# priority second (category/age combo) |
127
|
|
|
|
|
|
|
# label third (chronological order) |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# check expirations first |
130
|
48
|
|
|
|
|
117
|
my $exp_fnum = $self->fname2fnum("expired"); |
131
|
48
|
50
|
33
|
|
|
111
|
( $a->[$exp_fnum] and not $b->[$exp_fnum] ) and return 1; |
132
|
48
|
50
|
33
|
|
|
158
|
( not $a->[$exp_fnum] and $b->[$exp_fnum] ) and return -1; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# compare priority - posting category w/ age penalty |
135
|
48
|
|
|
|
|
97
|
my $pri_fnum = $self->fname2fnum("priority"); |
136
|
48
|
100
|
|
|
|
117
|
if ( $a->[$pri_fnum] != $b->[$pri_fnum] ) { |
137
|
18
|
|
|
|
|
48
|
return $a->[$pri_fnum] <=> $b->[$pri_fnum]; |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# otherwise sort by label (chronological order) |
141
|
30
|
|
|
|
|
71
|
my $lbl_fnum = $self->fname2fnum("label"); |
142
|
30
|
|
|
|
|
84
|
return $a->[$lbl_fnum] cmp $b->[$lbl_fnum]; |
143
|
2
|
|
|
|
|
15
|
}; |
144
|
|
|
|
|
|
|
$params->{filter_func} = sub { |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# filter: skip expired items |
147
|
33
|
|
|
33
|
|
64
|
my $exp_fnum = $self->fname2fnum("expired"); |
148
|
33
|
|
|
|
|
84
|
return not $_[$exp_fnum]; |
149
|
2
|
|
|
|
|
9
|
}; |
150
|
|
|
|
|
|
|
$params->{format_func} = sub { |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# generate HTML text |
153
|
10
|
|
|
10
|
|
22
|
my $txt_fnum = $self->fname2fnum("text"); |
154
|
10
|
|
|
|
|
28
|
my $pri_fnum = $self->fname2fnum("priority"); |
155
|
10
|
|
|
|
|
65
|
return $_[$txt_fnum] . "\n<!--- priority " . $_[$pri_fnum] . " --->"; |
156
|
2
|
|
|
|
|
8
|
}; |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# put parameters for fmt_handler_html() on the html list |
159
|
2
|
|
|
|
|
4
|
push @{ $self->{actions}{html} }, [ $self->{short_path}, $params ]; |
|
2
|
|
|
|
|
9
|
|
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# set parameters for the long news format |
163
|
2
|
50
|
|
|
|
7
|
if ( defined $self->{long_path} ) { |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# create the SiteNews-specific action list |
166
|
|
|
|
|
|
|
# It will use WebFetch::Input::SiteNews::fmt_handler_sitenews_long() |
167
|
|
|
|
|
|
|
# which is defined in this file |
168
|
2
|
|
|
|
|
7
|
$self->{actions}{sitenews_long} = []; |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
# put parameters for fmt_handler_sitenews_long() on the list |
171
|
2
|
|
|
|
|
4
|
push @{ $self->{actions}{sitenews_long} }, [ $self->{long_path} ]; |
|
2
|
|
|
|
|
7
|
|
172
|
|
|
|
|
|
|
} |
173
|
2
|
|
|
|
|
6
|
return; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# inner portion of input parsing - called by parse_input() |
177
|
|
|
|
|
|
|
# read SiteNews text file and parse it into news items to return to caller |
178
|
|
|
|
|
|
|
sub parse_input_inner |
179
|
|
|
|
|
|
|
{ |
180
|
2
|
|
|
2
|
0
|
8
|
my ( $self, $news_data_fd ) = @_; |
181
|
2
|
|
|
|
|
3
|
my @news_items; |
182
|
2
|
|
|
|
|
6
|
my $position = 0; |
183
|
2
|
|
|
|
|
8
|
my $state = initial_state; # before first entry |
184
|
2
|
|
|
|
|
4
|
my ($current); |
185
|
2
|
|
|
|
|
10
|
$cat_priorities = {}; # priorities for sorting |
186
|
2
|
|
|
|
|
79
|
while (<$news_data_fd>) { |
187
|
233
|
|
|
|
|
380
|
chomp; |
188
|
233
|
100
|
|
|
|
462
|
/^\s*\#/x and next; # skip comments |
189
|
228
|
100
|
|
|
|
722
|
/^\s*$/x and next; # skip blank lines |
190
|
|
|
|
|
|
|
|
191
|
192
|
100
|
|
|
|
423
|
if (/^[^\s]/x) { |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# found attribute line |
194
|
107
|
100
|
|
|
|
175
|
if ( $state == initial_state ) { |
195
|
10
|
100
|
|
|
|
55
|
if (/^categories:\s*(.*)/x) { |
|
|
100
|
|
|
|
|
|
196
|
2
|
|
|
|
|
22
|
my @cats = split( ' ', $1 ); |
197
|
2
|
|
|
|
|
6
|
my ($i); |
198
|
2
|
|
|
|
|
11
|
$cat_priorities->{"default"} = 999; |
199
|
2
|
|
|
|
|
11
|
for ( $i = 0 ; $i <= $#cats ; $i++ ) { |
200
|
8
|
|
|
|
|
37
|
$cat_priorities->{ $cats[$i] } = $i + 1; |
201
|
|
|
|
|
|
|
} |
202
|
2
|
|
|
|
|
18
|
next; |
203
|
|
|
|
|
|
|
} elsif (/^(\w+):\s*(.*)/x) { |
204
|
6
|
|
|
|
|
22
|
$self->set_param( $1, $2 ); |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
} |
207
|
105
|
100
|
100
|
|
|
165
|
if ( $state == initial_state or $state == text_state ) { |
|
|
50
|
|
|
|
|
|
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# found first attribute of a new entry |
210
|
39
|
100
|
|
|
|
174
|
if (/^([^=]+)=(.*)/x) { |
211
|
33
|
|
|
|
|
54
|
$current = {}; |
212
|
33
|
|
|
|
|
91
|
$current->{position} = $position++; |
213
|
33
|
|
|
|
|
89
|
$current->{$1} = $2; |
214
|
33
|
|
|
|
|
61
|
push( @news_items, $current ); |
215
|
33
|
|
|
|
|
58
|
$state = attr_state; |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
} elsif ( $state == attr_state ) { |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
# found a followup attribute |
220
|
66
|
50
|
|
|
|
202
|
if (/^([^=]+)=(.*)/x) { |
221
|
66
|
|
|
|
|
304
|
$current->{$1} = $2; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
} else { |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# found text line |
227
|
85
|
50
|
66
|
|
|
132
|
if ( $state == initial_state ) { |
|
|
50
|
|
|
|
|
|
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
# cannot accept text before any attributes |
230
|
0
|
|
|
|
|
0
|
next; |
231
|
|
|
|
|
|
|
} elsif ( $state == attr_state or $state == text_state ) { |
232
|
85
|
100
|
|
|
|
140
|
if ( defined $current->{text} ) { |
233
|
52
|
|
|
|
|
125
|
$current->{text} .= "\n$_"; |
234
|
|
|
|
|
|
|
} else { |
235
|
33
|
|
|
|
|
69
|
$current->{text} = $_; |
236
|
|
|
|
|
|
|
} |
237
|
85
|
|
|
|
|
124
|
$state = text_state; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
} |
241
|
2
|
|
|
|
|
16
|
return @news_items; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
# parse input file |
245
|
|
|
|
|
|
|
sub parse_input |
246
|
|
|
|
|
|
|
{ |
247
|
2
|
|
|
2
|
0
|
9
|
my ( $self, $input ) = @_; |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
# parse data file |
250
|
2
|
|
|
|
|
4
|
my $news_data; |
251
|
2
|
50
|
|
|
|
153
|
if ( not open( $news_data, "<", $input ) ) { |
252
|
0
|
|
|
|
|
0
|
croak "$0: failed to open $input: $!\n"; |
253
|
|
|
|
|
|
|
} |
254
|
2
|
|
|
|
|
13
|
my @news_items = $self->parse_input_inner($news_data); |
255
|
2
|
|
|
|
|
26
|
close $news_data; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# translate parsed news into the WebFetch Embedding API data table |
258
|
2
|
|
|
|
|
7
|
my ( %label_hash, $pos ); |
259
|
2
|
|
|
|
|
7
|
$pos = 0; |
260
|
2
|
|
|
|
|
5
|
foreach my $item (@news_items) { |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# collect fields for the data record |
263
|
33
|
50
|
|
|
|
100
|
my $title = ( defined $item->{title} ) ? $item->{title} : ""; |
264
|
33
|
50
|
|
|
|
90
|
my $posted = ( defined $item->{posted} ) ? $item->{posted} : ""; |
265
|
33
|
50
|
|
|
|
70
|
my $category = ( defined $item->{category} ) ? $item->{category} : ""; |
266
|
33
|
50
|
|
|
|
116
|
my $text = ( defined $item->{text} ) ? $item->{text} : ""; |
267
|
|
|
|
|
|
|
my $url_prefix = |
268
|
33
|
50
|
|
|
|
75
|
( defined $self->{url_prefix} ) ? $self->{url_prefix} : ""; |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# timestamp processing using optional locale and time_zone from WebFetch object's datetime_settings hash |
271
|
|
|
|
|
|
|
# This is usually set by SiteNews file's global settings at the top |
272
|
33
|
|
|
|
|
49
|
my ( %dt_opts, $time_str, $anchor_time ); |
273
|
33
|
50
|
33
|
|
|
182
|
if ( exists $self->{datetime_settings} and reftype $self->{datetime_settings} eq "HASH" ) { |
274
|
33
|
|
|
|
|
52
|
%dt_opts = %{ $self->{datetime_settings} }; # get locale and time_zone settings if available |
|
33
|
|
|
|
|
119
|
|
275
|
|
|
|
|
|
|
} |
276
|
33
|
50
|
|
|
|
76
|
if ($posted) { |
277
|
33
|
|
|
|
|
94
|
my $date_ref = WebFetch::parse_date( \%dt_opts, $posted ); |
278
|
33
|
100
|
|
|
|
111
|
if ( defined $date_ref ) { |
279
|
8
|
100
|
|
|
|
64
|
my $dt = ( ref $date_ref eq "DateTime" ) ? $date_ref : DateTime->new( @$date_ref, %dt_opts ); |
280
|
8
|
|
|
|
|
16388
|
$time_str = WebFetch::gen_timestamp( \%dt_opts, $dt ); |
281
|
8
|
|
|
|
|
8521
|
$anchor_time = WebFetch::anchor_timestr( \%dt_opts, $dt ); |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
} |
284
|
33
|
100
|
66
|
|
|
346
|
if ( not defined $time_str or not defined $anchor_time ) { |
285
|
25
|
|
|
|
|
34
|
$time_str = "undated"; |
286
|
25
|
|
|
|
|
41
|
$anchor_time = "0000-undated"; |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
# generate an intra-page link label |
290
|
33
|
|
|
|
|
49
|
my ( $label, $count ); |
291
|
33
|
|
|
|
|
52
|
$count = 0; |
292
|
33
|
|
66
|
|
|
222
|
while ( ( $label = $anchor_time . "-" . sprintf( "%03d", $count ) ) |
293
|
|
|
|
|
|
|
and defined $label_hash{$label} ) |
294
|
|
|
|
|
|
|
{ |
295
|
300
|
|
|
|
|
983
|
$count++; |
296
|
|
|
|
|
|
|
} |
297
|
33
|
|
|
|
|
113
|
$label_hash{$label} = 1; |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# generate data record for output |
300
|
33
|
|
|
|
|
121
|
$self->data->add_record( |
301
|
|
|
|
|
|
|
$time_str, $title, $self->priority($item), |
302
|
|
|
|
|
|
|
expired($item), $pos, $label, $url_prefix . "#" . $label, |
303
|
|
|
|
|
|
|
$category, $text |
304
|
|
|
|
|
|
|
); |
305
|
33
|
|
|
|
|
102
|
$pos++; |
306
|
|
|
|
|
|
|
} |
307
|
2
|
|
|
|
|
39
|
return; |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
# format handler function specific to this module's long-news output format |
311
|
|
|
|
|
|
|
sub fmt_handler_sitenews_long |
312
|
|
|
|
|
|
|
{ |
313
|
|
|
|
|
|
|
# TODO move sort block to separate function and remove the no-critic exemption |
314
|
|
|
|
|
|
|
## no critic ( BuiltinFunctions::RequireSimpleSortBlock ) |
315
|
2
|
|
|
2
|
0
|
5
|
my ($self) = @_; |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
# sort events for long display |
318
|
|
|
|
|
|
|
my @long_news = sort { |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# sort news entries for long display |
321
|
|
|
|
|
|
|
# sorting priority: |
322
|
|
|
|
|
|
|
# date first |
323
|
|
|
|
|
|
|
# category/priority second |
324
|
|
|
|
|
|
|
# reverse file order last |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
# sort by date |
327
|
37
|
|
|
|
|
81
|
my $lbl_fnum = $self->fname2fnum("label"); |
328
|
37
|
|
|
|
|
84
|
my ( $a_date, $b_date ) = ( $a->[$lbl_fnum], $b->[$lbl_fnum] ); |
329
|
37
|
|
|
|
|
134
|
$a_date =~ s/-.*//x; |
330
|
37
|
|
|
|
|
101
|
$b_date =~ s/-.*//x; |
331
|
37
|
100
|
|
|
|
88
|
if ( $a_date ne $b_date ) { |
332
|
7
|
|
|
|
|
17
|
return $b_date cmp $a_date; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
# sort by priority (within same date) |
336
|
30
|
|
|
|
|
66
|
my $pri_fnum = $self->fname2fnum("priority"); |
337
|
30
|
100
|
|
|
|
71
|
if ( $a->[$pri_fnum] != $b->[$pri_fnum] ) { |
338
|
1
|
|
|
|
|
5
|
return $a->[$pri_fnum] <=> $b->[$pri_fnum]; |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
# sort by chronological order (within same date and priority) |
342
|
29
|
|
|
|
|
81
|
return $a->[$lbl_fnum] cmp $b->[$lbl_fnum]; |
343
|
2
|
|
|
|
|
4
|
} @{ $self->{data}{records} }; |
|
2
|
|
|
|
|
19
|
|
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
# process the links for the long list |
346
|
2
|
|
|
|
|
5
|
my ( @long_text, $prev, $url_prefix, $i ); |
347
|
|
|
|
|
|
|
$url_prefix = |
348
|
|
|
|
|
|
|
( defined $self->{url_prefix} ) |
349
|
|
|
|
|
|
|
? $self->{url_prefix} |
350
|
2
|
50
|
|
|
|
8
|
: ""; |
351
|
2
|
|
|
|
|
7
|
$prev = undef; |
352
|
2
|
|
|
|
|
8
|
push @long_text, "<dl>"; |
353
|
2
|
|
|
|
|
7
|
my $lbl_fnum = $self->fname2fnum("label"); |
354
|
2
|
|
|
|
|
7
|
my $date_fnum = $self->fname2fnum("date"); |
355
|
2
|
|
|
|
|
6
|
my $title_fnum = $self->fname2fnum("title"); |
356
|
2
|
|
|
|
|
7
|
my $txt_fnum = $self->fname2fnum("text"); |
357
|
2
|
|
|
|
|
13
|
my $exp_fnum = $self->fname2fnum("expired"); |
358
|
2
|
|
|
|
|
7
|
my $pri_fnum = $self->fname2fnum("priority"); |
359
|
|
|
|
|
|
|
|
360
|
2
|
|
|
|
|
16
|
for ( $i = 0 ; $i <= $#long_news ; $i++ ) { |
361
|
33
|
|
|
|
|
48
|
my $news = $long_news[$i]; |
362
|
33
|
100
|
100
|
|
|
95
|
if ( ( !defined $prev->[$date_fnum] ) |
363
|
|
|
|
|
|
|
or $prev->[$date_fnum] ne $news->[$date_fnum] ) |
364
|
|
|
|
|
|
|
{ |
365
|
9
|
|
|
|
|
22
|
push @long_text, "<dt>" . $news->[$date_fnum]; |
366
|
9
|
|
|
|
|
17
|
push @long_text, "<dd>"; |
367
|
|
|
|
|
|
|
} |
368
|
33
|
50
|
|
|
|
185
|
push @long_text, |
369
|
|
|
|
|
|
|
"<a name=\"" |
370
|
|
|
|
|
|
|
. $news->[$lbl_fnum] . "\">" |
371
|
|
|
|
|
|
|
. $news->[$title_fnum] |
372
|
|
|
|
|
|
|
. "</a>\n" |
373
|
|
|
|
|
|
|
. $news->[$txt_fnum] |
374
|
|
|
|
|
|
|
. "<!--- priority: " |
375
|
|
|
|
|
|
|
. $news->[$pri_fnum] |
376
|
|
|
|
|
|
|
. ( $news->[$exp_fnum] ? " expired" : "" ) . " --->"; |
377
|
33
|
|
|
|
|
55
|
push @long_text, "<p>"; |
378
|
33
|
|
|
|
|
69
|
$prev = $news; |
379
|
|
|
|
|
|
|
} |
380
|
2
|
|
|
|
|
11
|
push @long_text, "</dl>"; |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
# store it for later save to disk |
383
|
2
|
|
|
|
|
165
|
$self->html_savable( $self->{long_path}, join( "\n", @long_text ) . "\n" ); |
384
|
2
|
|
|
|
|
18
|
return; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# |
388
|
|
|
|
|
|
|
# utility functions |
389
|
|
|
|
|
|
|
# |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
# function to detect if a news entry is expired |
392
|
|
|
|
|
|
|
sub expired |
393
|
|
|
|
|
|
|
{ |
394
|
33
|
|
|
33
|
0
|
64
|
my ($entry) = @_; |
395
|
33
|
50
|
|
|
|
233
|
return 0 if not exists $entry->{expires}; |
396
|
0
|
0
|
|
|
|
0
|
return 0 if not defined $entry->{expires}; |
397
|
0
|
|
|
|
|
0
|
return $entry->{expires} < $now; |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
# function to compute a priority value which decays with age |
401
|
|
|
|
|
|
|
sub priority |
402
|
|
|
|
|
|
|
{ |
403
|
33
|
|
|
33
|
0
|
67
|
my ( $self, $entry ) = @_; |
404
|
|
|
|
|
|
|
|
405
|
33
|
50
|
|
|
|
79
|
return 999 if not exists $entry->{posted}; |
406
|
33
|
50
|
|
|
|
65
|
return 999 if not defined $entry->{posted}; |
407
|
|
|
|
|
|
|
|
408
|
33
|
|
|
|
|
81
|
my $date_ref = WebFetch::parse_date( $entry->{posted} ); |
409
|
33
|
100
|
|
|
|
116
|
return 999 if not defined $date_ref; |
410
|
|
|
|
|
|
|
|
411
|
8
|
|
|
|
|
12
|
my %dt_opts; |
412
|
8
|
50
|
33
|
|
|
56
|
if ( exists $self->{datetime_settings} and reftype( $self->{datetime_settings} ) eq "HASH" ) { |
413
|
8
|
|
|
|
|
19
|
%dt_opts = %{ $self->{datetime_settings} }; # get locale and time_zone settings if available |
|
8
|
|
|
|
|
28
|
|
414
|
|
|
|
|
|
|
} |
415
|
8
|
100
|
|
|
|
32
|
my $dt = ( ref $date_ref eq "DateTime" ) ? $date_ref : DateTime->new( @$date_ref, %dt_opts ); |
416
|
8
|
|
|
|
|
724
|
my $age = ( $now->subtract_datetime($dt) )->delta_days(); |
417
|
8
|
|
|
|
|
2576
|
my $bonus = 0; |
418
|
|
|
|
|
|
|
|
419
|
8
|
50
|
|
|
|
23
|
if ( $age <= 2 ) { |
420
|
0
|
|
|
|
|
0
|
$bonus -= 2 - $age; |
421
|
|
|
|
|
|
|
} |
422
|
8
|
100
|
66
|
|
|
46
|
if ( ( defined $entry->{category} ) |
423
|
|
|
|
|
|
|
and ( defined $cat_priorities->{ $entry->{category} } ) ) |
424
|
|
|
|
|
|
|
{ |
425
|
|
|
|
|
|
|
my $cat_pri = |
426
|
|
|
|
|
|
|
( exists $cat_priorities->{ $entry->{category} } ) |
427
|
|
|
|
|
|
|
? $cat_priorities->{ $entry->{category} } |
428
|
7
|
50
|
|
|
|
22
|
: 0; |
429
|
7
|
|
|
|
|
51
|
return $cat_pri + $age * 0.025 + $bonus; |
430
|
|
|
|
|
|
|
} else { |
431
|
1
|
|
|
|
|
11
|
return $cat_priorities->{"default"} + $age * 0.025 + $bonus; |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
} |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
1; |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=pod |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
=encoding UTF-8 |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
=head1 NAME |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
WebFetch::Input::SiteNews - download and save SiteNews headlines from a local file |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=head1 VERSION |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
version 0.15.9 |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
=head1 SYNOPSIS |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
In perl scripts: |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
C<use WebFetch::Input::SiteNews;> |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
From the command line: |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
C<perl -w -MWebFetch::Input::SiteNews -e "&fetch_main" -- --dir directory |
458
|
|
|
|
|
|
|
--source news-file --short short-form-output-file |
459
|
|
|
|
|
|
|
--long long-form-output-file> |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=head1 DESCRIPTION |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
This module gets the current headlines from a site-local file. |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
The I<--source> parameter specifies a file name which contains news to be |
466
|
|
|
|
|
|
|
posted. See L<"FILE FORMAT"> below for details on contents to put in the |
467
|
|
|
|
|
|
|
file. I<--source> may be specified more than once, allowing a single news |
468
|
|
|
|
|
|
|
output to come from more than one input. For example, one file could be |
469
|
|
|
|
|
|
|
manually maintained in CVS or RCS and another could be entered from a |
470
|
|
|
|
|
|
|
web form. |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
After this runs, the file C<site_news.html> will be created or replaced. |
473
|
|
|
|
|
|
|
If there already was a C<site_news.html> file, it will be moved to |
474
|
|
|
|
|
|
|
C<Osite_news.html>. |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=head1 FILE FORMAT |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
The WebFetch::Input::SiteNews data format is used to set up news for the |
479
|
|
|
|
|
|
|
local web site and allow other sites to import it via WebFetch. |
480
|
|
|
|
|
|
|
The file is plain text containing comments and the news items. |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
There are three forms of outputs generated from these news files. |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
The I<"short news" output> is a small number (5 by default) |
485
|
|
|
|
|
|
|
of HTML text and links used for display in a small news window. |
486
|
|
|
|
|
|
|
This list takes into account expiration dates and |
487
|
|
|
|
|
|
|
priorities to pick which news entries are displayed and in what order. |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
The I<"long news" output> lists all the news entries chronologically. |
490
|
|
|
|
|
|
|
It does not take expiration or priority into account. |
491
|
|
|
|
|
|
|
It is intended for a comprehensive site news list. |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
The I<export modes> make news items available in formats other web sites |
494
|
|
|
|
|
|
|
can retrieve to post news about your site. They are chronological |
495
|
|
|
|
|
|
|
listings that omit expired items. |
496
|
|
|
|
|
|
|
They do not take priorities into account. |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=over 4 |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=item global parameters |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
Lines coming before the first news item can set global parameters. |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
=over 4 |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=item categories |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
A line before the first news item beginning with "categories:" contains a |
509
|
|
|
|
|
|
|
whitespace-delimited list of news category names in order from highest |
510
|
|
|
|
|
|
|
to lowest priority. |
511
|
|
|
|
|
|
|
These priority names are used by the news item attributes and |
512
|
|
|
|
|
|
|
then for sorting "short news" list items. |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=back |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=item data lines |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
Non-blank non-indented non-comment lines are I<data lines>. |
519
|
|
|
|
|
|
|
Each data line contains a I<name=value> pair. |
520
|
|
|
|
|
|
|
Each group of consecutive data lines is followed by an arbitrary number |
521
|
|
|
|
|
|
|
of indented lines which contain HTML text for the news entry. |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
The recognized attributes are as follows: |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
=over 4 |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
=item category |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
used for prioritization, values are set by the categories global parameter |
530
|
|
|
|
|
|
|
(required) |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=item posted |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
date posted, format is a numerical date YYYYMMDD (required) |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
=item expires |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
expiration date, format is a numerical date YYYYMMDD (optional) |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
=item title |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
shorter title for use in news exports to other sites, |
543
|
|
|
|
|
|
|
otherwise the whole news text will be used (optional) |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=back |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=item text lines |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
Intended lines are HTML text for the news item. |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
=item comments |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
Comments are lines beginning with "#". |
554
|
|
|
|
|
|
|
They are ignored so they can be used for human-readable information. |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=back |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
Note that the "short news" list has some modifications to |
559
|
|
|
|
|
|
|
priorities based on the age of the news item, |
560
|
|
|
|
|
|
|
so that the short list will favor newer items when |
561
|
|
|
|
|
|
|
they're the same priority. |
562
|
|
|
|
|
|
|
There is a sorting "priority bonus" for items less than a |
563
|
|
|
|
|
|
|
day old, which increases their priority by two priority levels. |
564
|
|
|
|
|
|
|
Day-old news items get a bonus of one priority level. |
565
|
|
|
|
|
|
|
All news items also "decay" in priority slightly every day, |
566
|
|
|
|
|
|
|
dropping a whole priority level every 40 days. |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
=head1 SEE ALSO |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
L<WebFetch> |
571
|
|
|
|
|
|
|
L<https://github.com/ikluft/WebFetch> |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
=head1 BUGS AND LIMITATIONS |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
Please report bugs via GitHub at L<https://github.com/ikluft/WebFetch/issues> |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
Patches and enhancements may be submitted via a pull request at L<https://github.com/ikluft/WebFetch/pulls> |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
=head1 AUTHOR |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
Ian Kluft <https://github.com/ikluft> |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
This software is Copyright (c) 1998-2023 by Ian Kluft. |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
This is free software, licensed under: |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
=cut |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
__END__ |
594
|
|
|
|
|
|
|
# POD docs follow |
595
|
|
|
|
|
|
|
|