line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# WebFetch::Input::SiteNews.pm - get headlines from a site-local file |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Copyright (c) 1998-2009 Ian Kluft. This program is free software; you can |
5
|
|
|
|
|
|
|
# redistribute it and/or modify it under the terms of the GNU General Public |
6
|
|
|
|
|
|
|
# License Version 3. See http://www.webfetch.org/GPLv3.txt |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
package WebFetch::Input::SiteNews; |
9
|
|
|
|
|
|
|
|
10
|
1
|
|
|
1
|
|
1305
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
34
|
|
11
|
1
|
|
|
1
|
|
5
|
use base "WebFetch"; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
90
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
use Carp; |
14
|
|
|
|
|
|
|
use Date::Calc qw(Today Delta_Days Month_to_Text); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 NAME |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
WebFetch::Input::SiteNews - download and save SiteNews headlines |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=cut |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# set defaults |
23
|
|
|
|
|
|
|
our ( $cat_priorities, $now, $nowstamp ); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
our @Options = ( |
26
|
|
|
|
|
|
|
"short=s", |
27
|
|
|
|
|
|
|
"long=s", |
28
|
|
|
|
|
|
|
); |
29
|
|
|
|
|
|
|
our $Usage = "--short short-output-file --long long-output-file"; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# configuration parameters |
32
|
|
|
|
|
|
|
our $num_links = 5; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# no user-servicable parts beyond this point |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# register capabilities with WebFetch |
37
|
|
|
|
|
|
|
__PACKAGE__->module_register( "cmdline", "input:sitenews" ); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head1 SYNOPSIS |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
In perl scripts: |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
C |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
From the command line: |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
C
|
48
|
|
|
|
|
|
|
--source news-file --short short-form-output-file |
49
|
|
|
|
|
|
|
--long long-form-output-file> |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head1 DESCRIPTION |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
This module gets the current headlines from a site-local file. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
The I<--source> parameter specifies a file name which contains news to be |
56
|
|
|
|
|
|
|
posted. See L<"FILE FORMAT"> below for details on contents to put in the |
57
|
|
|
|
|
|
|
file. I<--source> may be specified more than once, allowing a single news |
58
|
|
|
|
|
|
|
output to come from more than one input. For example, one file could be |
59
|
|
|
|
|
|
|
manually maintained in CVS or RCS and another could be entered from a |
60
|
|
|
|
|
|
|
web form. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
After this runs, the file C will be created or replaced. |
63
|
|
|
|
|
|
|
If there already was a C file, it will be moved to |
64
|
|
|
|
|
|
|
C. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=cut |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# constants for state names |
69
|
|
|
|
|
|
|
sub initial_state { 0; } |
70
|
|
|
|
|
|
|
sub attr_state { 1; } |
71
|
|
|
|
|
|
|
sub text_state { 2; } |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub fetch |
74
|
|
|
|
|
|
|
{ |
75
|
|
|
|
|
|
|
my ( $self ) = @_; |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# set parameters for WebFetch routines |
78
|
|
|
|
|
|
|
if ( !defined $self->{num_links}) { |
79
|
|
|
|
|
|
|
$self->{num_links} = $WebFetch::Input::SiteNews::num_links; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
if ( !defined $self->{style}) { |
82
|
|
|
|
|
|
|
$self->{style} = {}; |
83
|
|
|
|
|
|
|
$self->{style}{para} = 1; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# set up Webfetch Embedding API data |
87
|
|
|
|
|
|
|
$self->{actions} = {}; |
88
|
|
|
|
|
|
|
$self->data->add_fields( "date", "title", "priority", "expired", |
89
|
|
|
|
|
|
|
"position", "label", "url", "category", "text" ); |
90
|
|
|
|
|
|
|
# defined which fields match to which "well-known field names" |
91
|
|
|
|
|
|
|
$self->data->add_wk_names( |
92
|
|
|
|
|
|
|
"title" => "title", |
93
|
|
|
|
|
|
|
"url" => "url", |
94
|
|
|
|
|
|
|
"date" => "date", |
95
|
|
|
|
|
|
|
"summary" => "text", |
96
|
|
|
|
|
|
|
"category" => "category" |
97
|
|
|
|
|
|
|
); |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# process the links |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# get local time for various date comparisons |
102
|
|
|
|
|
|
|
$now = [ Today ]; |
103
|
|
|
|
|
|
|
$nowstamp = sprintf "%04d%02d%02d", @$now; |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# parse data file |
106
|
|
|
|
|
|
|
my $source; |
107
|
|
|
|
|
|
|
if (( exists $self->{sources}) and ( ref $self->{sources} eq "ARRAY" )) { |
108
|
|
|
|
|
|
|
foreach $source ( @{$self->{sources}}) { |
109
|
|
|
|
|
|
|
$self->parse_input( $source ); |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# set parameters for the short news format |
114
|
|
|
|
|
|
|
if ( defined $self->{short_path} ) { |
115
|
|
|
|
|
|
|
# create the HTML actions list |
116
|
|
|
|
|
|
|
$self->{actions}{html} = []; |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
# create the HTML-generation parameters |
119
|
|
|
|
|
|
|
my $params = {}; |
120
|
|
|
|
|
|
|
$params = {}; |
121
|
|
|
|
|
|
|
$params->{sort_func} = sub { |
122
|
|
|
|
|
|
|
my ( $a, $b ) = @_; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# sort/compare news entries for the short display |
125
|
|
|
|
|
|
|
# sorting priority: |
126
|
|
|
|
|
|
|
# expiration status first (expired items last) |
127
|
|
|
|
|
|
|
# priority second (category/age combo) |
128
|
|
|
|
|
|
|
# label third (chronological order) |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# check expirations first |
131
|
|
|
|
|
|
|
my $exp_fnum = $self->fname2fnum("expired"); |
132
|
|
|
|
|
|
|
( $a->[$exp_fnum] and !$b->[$exp_fnum]) and return 1; |
133
|
|
|
|
|
|
|
( !$a->[$exp_fnum] and $b->[$exp_fnum]) and return -1; |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# compare priority - posting category w/ age penalty |
136
|
|
|
|
|
|
|
my $pri_fnum = $self->fname2fnum("priority"); |
137
|
|
|
|
|
|
|
if ( $a->[$pri_fnum] != $b->[$pri_fnum] ) { |
138
|
|
|
|
|
|
|
return $a->[$pri_fnum] <=> $b->[$pri_fnum]; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# otherwise sort by label (chronological order) |
142
|
|
|
|
|
|
|
my $lbl_fnum = $self->fname2fnum("label"); |
143
|
|
|
|
|
|
|
return $a->[$lbl_fnum] cmp $b->[$lbl_fnum]; |
144
|
|
|
|
|
|
|
}; |
145
|
|
|
|
|
|
|
$params->{filter_func} = sub { |
146
|
|
|
|
|
|
|
# filter: skip expired items |
147
|
|
|
|
|
|
|
my $exp_fnum = $self->fname2fnum("expired"); |
148
|
|
|
|
|
|
|
return ! $_[$exp_fnum]; |
149
|
|
|
|
|
|
|
}; |
150
|
|
|
|
|
|
|
$params->{format_func} = sub { |
151
|
|
|
|
|
|
|
# generate HTML text |
152
|
|
|
|
|
|
|
my $txt_fnum = $self->fname2fnum("text"); |
153
|
|
|
|
|
|
|
my $pri_fnum = $self->fname2fnum("priority"); |
154
|
|
|
|
|
|
|
return $_[$txt_fnum] |
155
|
|
|
|
|
|
|
."\n"; |
156
|
|
|
|
|
|
|
}; |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# put parameters for fmt_handler_html() on the html list |
159
|
|
|
|
|
|
|
push @{$self->{actions}{html}}, [ $self->{short_path}, $params ]; |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# set parameters for the long news format |
163
|
|
|
|
|
|
|
if ( defined $self->{long_path} ) { |
164
|
|
|
|
|
|
|
# create the SiteNews-specific action list |
165
|
|
|
|
|
|
|
# It will use WebFetch::Input::SiteNews::fmt_handler_sitenews_long() |
166
|
|
|
|
|
|
|
# which is defined in this file |
167
|
|
|
|
|
|
|
$self->{actions}{sitenews_long} = []; |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# put parameters for fmt_handler_sitenews_long() on the list |
170
|
|
|
|
|
|
|
push @{$self->{actions}{sitenews_long}}, [ $self->{long_path} ]; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# parse input file |
175
|
|
|
|
|
|
|
sub parse_input |
176
|
|
|
|
|
|
|
{ |
177
|
|
|
|
|
|
|
my ( $self, $input ) = @_; |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# parse data file |
180
|
|
|
|
|
|
|
if ( ! open ( news_data, $input )) { |
181
|
|
|
|
|
|
|
croak "$0: failed to open $input: $!\n"; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
my @news_items; |
184
|
|
|
|
|
|
|
my $position = 0; |
185
|
|
|
|
|
|
|
my $state = initial_state; # before first entry |
186
|
|
|
|
|
|
|
my ( $current ); |
187
|
|
|
|
|
|
|
$cat_priorities = {}; # priorities for sorting |
188
|
|
|
|
|
|
|
while ( ) { |
189
|
|
|
|
|
|
|
chop; |
190
|
|
|
|
|
|
|
/^\s*\#/ and next; # skip comments |
191
|
|
|
|
|
|
|
/^\s*$/ and next; # skip blank lines |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
if ( /^[^\s]/ ) { |
194
|
|
|
|
|
|
|
# found attribute line |
195
|
|
|
|
|
|
|
if ( $state == initial_state ) { |
196
|
|
|
|
|
|
|
if ( /^categories:\s*(.*)/ ) { |
197
|
|
|
|
|
|
|
my @cats = split ( /\s+/, $1 ); |
198
|
|
|
|
|
|
|
my ( $i ); |
199
|
|
|
|
|
|
|
$cat_priorities->{"default"} = 999; |
200
|
|
|
|
|
|
|
for ( $i=0; $i<=$#cats; $i++ ) { |
201
|
|
|
|
|
|
|
$cat_priorities->{$cats[$i]} |
202
|
|
|
|
|
|
|
= $i + 1; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
next; |
205
|
|
|
|
|
|
|
} elsif ( /^url-prefix:\s*(.*)/ ) { |
206
|
|
|
|
|
|
|
$self->{url_prefix} = $1; |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
if ( $state == initial_state or $state == text_state ) |
210
|
|
|
|
|
|
|
{ |
211
|
|
|
|
|
|
|
# found first attribute of a new entry |
212
|
|
|
|
|
|
|
if ( /^([^=]+)=(.*)/ ) { |
213
|
|
|
|
|
|
|
$current = {}; |
214
|
|
|
|
|
|
|
$current->{position} = $position++; |
215
|
|
|
|
|
|
|
$current->{$1} = $2; |
216
|
|
|
|
|
|
|
push( @news_items, $current ); |
217
|
|
|
|
|
|
|
$state = attr_state; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
} elsif ( $state == attr_state ) { |
220
|
|
|
|
|
|
|
# found a followup attribute |
221
|
|
|
|
|
|
|
if ( /^([^=]+)=(.*)/ ) { |
222
|
|
|
|
|
|
|
$current->{$1} = $2; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
} else { |
226
|
|
|
|
|
|
|
# found text line |
227
|
|
|
|
|
|
|
if ( $state == initial_state ) { |
228
|
|
|
|
|
|
|
# cannot accept text before any attributes |
229
|
|
|
|
|
|
|
next; |
230
|
|
|
|
|
|
|
} elsif ( $state == attr_state or $state == text_state ) { |
231
|
|
|
|
|
|
|
if ( defined $current->{text}) { |
232
|
|
|
|
|
|
|
$current->{text} .= "\n$_"; |
233
|
|
|
|
|
|
|
} else { |
234
|
|
|
|
|
|
|
$current->{text} = $_; |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
$state = text_state; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# translate parsed news into the WebFetch Embedding API data table |
242
|
|
|
|
|
|
|
my ( $item, %label_hash, $pos ); |
243
|
|
|
|
|
|
|
$pos = 0; |
244
|
|
|
|
|
|
|
foreach $item ( @news_items ) { |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# generate an intra-page link label |
247
|
|
|
|
|
|
|
my ( $label, $count ); |
248
|
|
|
|
|
|
|
$count=0; |
249
|
|
|
|
|
|
|
while (( $label = $item->{posted}."-".sprintf("%03d",$count)), |
250
|
|
|
|
|
|
|
defined $label_hash{$label}) |
251
|
|
|
|
|
|
|
{ |
252
|
|
|
|
|
|
|
$count++; |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
$label_hash{$label} = 1; |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# save the data record |
257
|
|
|
|
|
|
|
my $title = ( defined $item->{title}) ? $item->{title} : ""; |
258
|
|
|
|
|
|
|
my $posted = ( defined $item->{posted}) ? $item->{posted} : ""; |
259
|
|
|
|
|
|
|
my $category = ( defined $item->{category}) |
260
|
|
|
|
|
|
|
? $item->{category} : ""; |
261
|
|
|
|
|
|
|
my $text = ( defined $item->{text}) ? $item->{text} : ""; |
262
|
|
|
|
|
|
|
my $url_prefix = ( defined $self->{url_prefix}) |
263
|
|
|
|
|
|
|
? $self->{url_prefix} : ""; |
264
|
|
|
|
|
|
|
$self->data->add_record( |
265
|
|
|
|
|
|
|
printstamp($posted), $title, priority( $item ), |
266
|
|
|
|
|
|
|
expired( $item ), $pos, $label, |
267
|
|
|
|
|
|
|
$url_prefix."#".$label, $category, $text ); |
268
|
|
|
|
|
|
|
$pos++; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# |
273
|
|
|
|
|
|
|
# utility functions |
274
|
|
|
|
|
|
|
# |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
# generate a printable version of the datestamp |
277
|
|
|
|
|
|
|
sub printstamp |
278
|
|
|
|
|
|
|
{ |
279
|
|
|
|
|
|
|
my ( $stamp ) = @_; |
280
|
|
|
|
|
|
|
my ( $year, $mon, $day ) = ( $stamp =~ /^(....)(..)(..)/ ); |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
return Month_to_Text(int($mon))." ".int($day).", $year"; |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
# function to detect if a news entry is expired |
286
|
|
|
|
|
|
|
sub expired |
287
|
|
|
|
|
|
|
{ |
288
|
|
|
|
|
|
|
my ( $entry ) = @_; |
289
|
|
|
|
|
|
|
return (( defined $entry->{expires}) and |
290
|
|
|
|
|
|
|
( $entry->{expires} lt $nowstamp )); |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# function to get the priority value from |
294
|
|
|
|
|
|
|
sub priority |
295
|
|
|
|
|
|
|
{ |
296
|
|
|
|
|
|
|
my ( $entry ) = @_; |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
( defined $entry->{posted}) or return 999; |
299
|
|
|
|
|
|
|
my ( $year, $mon, $day ) = ( $entry->{posted} =~ /^(....)(..)(..)/ ); |
300
|
|
|
|
|
|
|
my $age = Delta_Days( $year, $mon, $day, @$now ); |
301
|
|
|
|
|
|
|
my $bonus = 0; |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
if ( $age <= 2 ) { |
304
|
|
|
|
|
|
|
$bonus -= 2 - $age; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
if (( defined $entry->{category}) and |
307
|
|
|
|
|
|
|
( defined $cat_priorities->{$entry->{category}})) |
308
|
|
|
|
|
|
|
{ |
309
|
|
|
|
|
|
|
my $cat_pri = ( exists $cat_priorities->{$entry->{category}}) |
310
|
|
|
|
|
|
|
? $cat_priorities->{$entry->{category}} : 0; |
311
|
|
|
|
|
|
|
return $cat_pri + $age * 0.025 + $bonus; |
312
|
|
|
|
|
|
|
} else { |
313
|
|
|
|
|
|
|
return $cat_priorities->{"default"} + $age * 0.025 |
314
|
|
|
|
|
|
|
+ $bonus; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
1; |
319
|
|
|
|
|
|
|
__END__ |