File Coverage

blib/lib/WebFetch/Input/SiteNews.pm
Criterion Covered Total %
statement 6 6 100.0
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 8 8 100.0


line stmt bran cond sub pod time code
1             #
2             # WebFetch::Input::SiteNews.pm - get headlines from a site-local file
3             #
4             # Copyright (c) 1998-2009 Ian Kluft. This program is free software; you can
5             # redistribute it and/or modify it under the terms of the GNU General Public
6             # License Version 3. See http://www.webfetch.org/GPLv3.txt
7              
8             package WebFetch::Input::SiteNews;
9              
10 1     1   1305 use strict;
  1         2  
  1         34  
11 1     1   5 use base "WebFetch";
  1         2  
  1         90  
12              
13             use Carp;
14             use Date::Calc qw(Today Delta_Days Month_to_Text);
15              
16             =head1 NAME
17              
18             WebFetch::Input::SiteNews - download and save SiteNews headlines
19              
20             =cut
21              
22             # set defaults
23             our ( $cat_priorities, $now, $nowstamp );
24              
25             our @Options = (
26             "short=s",
27             "long=s",
28             );
29             our $Usage = "--short short-output-file --long long-output-file";
30              
31             # configuration parameters
32             our $num_links = 5;
33              
34             # no user-servicable parts beyond this point
35              
36             # register capabilities with WebFetch
37             __PACKAGE__->module_register( "cmdline", "input:sitenews" );
38              
39             =head1 SYNOPSIS
40              
41             In perl scripts:
42              
43             C
44              
45             From the command line:
46              
47             C
48             --source news-file --short short-form-output-file
49             --long long-form-output-file>
50              
51             =head1 DESCRIPTION
52              
53             This module gets the current headlines from a site-local file.
54              
55             The I<--source> parameter specifies a file name which contains news to be
56             posted. See L<"FILE FORMAT"> below for details on contents to put in the
57             file. I<--source> may be specified more than once, allowing a single news
58             output to come from more than one input. For example, one file could be
59             manually maintained in CVS or RCS and another could be entered from a
60             web form.
61              
62             After this runs, the file C will be created or replaced.
63             If there already was a C file, it will be moved to
64             C.
65              
66             =cut
67              
68             # constants for state names
69             sub initial_state { 0; }
70             sub attr_state { 1; }
71             sub text_state { 2; }
72              
73             sub fetch
74             {
75             my ( $self ) = @_;
76              
77             # set parameters for WebFetch routines
78             if ( !defined $self->{num_links}) {
79             $self->{num_links} = $WebFetch::Input::SiteNews::num_links;
80             }
81             if ( !defined $self->{style}) {
82             $self->{style} = {};
83             $self->{style}{para} = 1;
84             }
85              
86             # set up Webfetch Embedding API data
87             $self->{actions} = {};
88             $self->data->add_fields( "date", "title", "priority", "expired",
89             "position", "label", "url", "category", "text" );
90             # defined which fields match to which "well-known field names"
91             $self->data->add_wk_names(
92             "title" => "title",
93             "url" => "url",
94             "date" => "date",
95             "summary" => "text",
96             "category" => "category"
97             );
98              
99             # process the links
100              
101             # get local time for various date comparisons
102             $now = [ Today ];
103             $nowstamp = sprintf "%04d%02d%02d", @$now;
104              
105             # parse data file
106             my $source;
107             if (( exists $self->{sources}) and ( ref $self->{sources} eq "ARRAY" )) {
108             foreach $source ( @{$self->{sources}}) {
109             $self->parse_input( $source );
110             }
111             }
112              
113             # set parameters for the short news format
114             if ( defined $self->{short_path} ) {
115             # create the HTML actions list
116             $self->{actions}{html} = [];
117              
118             # create the HTML-generation parameters
119             my $params = {};
120             $params = {};
121             $params->{sort_func} = sub {
122             my ( $a, $b ) = @_;
123              
124             # sort/compare news entries for the short display
125             # sorting priority:
126             # expiration status first (expired items last)
127             # priority second (category/age combo)
128             # label third (chronological order)
129              
130             # check expirations first
131             my $exp_fnum = $self->fname2fnum("expired");
132             ( $a->[$exp_fnum] and !$b->[$exp_fnum]) and return 1;
133             ( !$a->[$exp_fnum] and $b->[$exp_fnum]) and return -1;
134              
135             # compare priority - posting category w/ age penalty
136             my $pri_fnum = $self->fname2fnum("priority");
137             if ( $a->[$pri_fnum] != $b->[$pri_fnum] ) {
138             return $a->[$pri_fnum] <=> $b->[$pri_fnum];
139             }
140              
141             # otherwise sort by label (chronological order)
142             my $lbl_fnum = $self->fname2fnum("label");
143             return $a->[$lbl_fnum] cmp $b->[$lbl_fnum];
144             };
145             $params->{filter_func} = sub {
146             # filter: skip expired items
147             my $exp_fnum = $self->fname2fnum("expired");
148             return ! $_[$exp_fnum];
149             };
150             $params->{format_func} = sub {
151             # generate HTML text
152             my $txt_fnum = $self->fname2fnum("text");
153             my $pri_fnum = $self->fname2fnum("priority");
154             return $_[$txt_fnum]
155             ."\n";
156             };
157              
158             # put parameters for fmt_handler_html() on the html list
159             push @{$self->{actions}{html}}, [ $self->{short_path}, $params ];
160             }
161              
162             # set parameters for the long news format
163             if ( defined $self->{long_path} ) {
164             # create the SiteNews-specific action list
165             # It will use WebFetch::Input::SiteNews::fmt_handler_sitenews_long()
166             # which is defined in this file
167             $self->{actions}{sitenews_long} = [];
168              
169             # put parameters for fmt_handler_sitenews_long() on the list
170             push @{$self->{actions}{sitenews_long}}, [ $self->{long_path} ];
171             }
172             }
173              
174             # parse input file
175             sub parse_input
176             {
177             my ( $self, $input ) = @_;
178              
179             # parse data file
180             if ( ! open ( news_data, $input )) {
181             croak "$0: failed to open $input: $!\n";
182             }
183             my @news_items;
184             my $position = 0;
185             my $state = initial_state; # before first entry
186             my ( $current );
187             $cat_priorities = {}; # priorities for sorting
188             while ( ) {
189             chop;
190             /^\s*\#/ and next; # skip comments
191             /^\s*$/ and next; # skip blank lines
192              
193             if ( /^[^\s]/ ) {
194             # found attribute line
195             if ( $state == initial_state ) {
196             if ( /^categories:\s*(.*)/ ) {
197             my @cats = split ( /\s+/, $1 );
198             my ( $i );
199             $cat_priorities->{"default"} = 999;
200             for ( $i=0; $i<=$#cats; $i++ ) {
201             $cat_priorities->{$cats[$i]}
202             = $i + 1;
203             }
204             next;
205             } elsif ( /^url-prefix:\s*(.*)/ ) {
206             $self->{url_prefix} = $1;
207             }
208             }
209             if ( $state == initial_state or $state == text_state )
210             {
211             # found first attribute of a new entry
212             if ( /^([^=]+)=(.*)/ ) {
213             $current = {};
214             $current->{position} = $position++;
215             $current->{$1} = $2;
216             push( @news_items, $current );
217             $state = attr_state;
218             }
219             } elsif ( $state == attr_state ) {
220             # found a followup attribute
221             if ( /^([^=]+)=(.*)/ ) {
222             $current->{$1} = $2;
223             }
224             }
225             } else {
226             # found text line
227             if ( $state == initial_state ) {
228             # cannot accept text before any attributes
229             next;
230             } elsif ( $state == attr_state or $state == text_state ) {
231             if ( defined $current->{text}) {
232             $current->{text} .= "\n$_";
233             } else {
234             $current->{text} = $_;
235             }
236             $state = text_state;
237             }
238             }
239             }
240              
241             # translate parsed news into the WebFetch Embedding API data table
242             my ( $item, %label_hash, $pos );
243             $pos = 0;
244             foreach $item ( @news_items ) {
245              
246             # generate an intra-page link label
247             my ( $label, $count );
248             $count=0;
249             while (( $label = $item->{posted}."-".sprintf("%03d",$count)),
250             defined $label_hash{$label})
251             {
252             $count++;
253             }
254             $label_hash{$label} = 1;
255              
256             # save the data record
257             my $title = ( defined $item->{title}) ? $item->{title} : "";
258             my $posted = ( defined $item->{posted}) ? $item->{posted} : "";
259             my $category = ( defined $item->{category})
260             ? $item->{category} : "";
261             my $text = ( defined $item->{text}) ? $item->{text} : "";
262             my $url_prefix = ( defined $self->{url_prefix})
263             ? $self->{url_prefix} : "";
264             $self->data->add_record(
265             printstamp($posted), $title, priority( $item ),
266             expired( $item ), $pos, $label,
267             $url_prefix."#".$label, $category, $text );
268             $pos++;
269             }
270             }
271              
272             #
273             # utility functions
274             #
275              
276             # generate a printable version of the datestamp
277             sub printstamp
278             {
279             my ( $stamp ) = @_;
280             my ( $year, $mon, $day ) = ( $stamp =~ /^(....)(..)(..)/ );
281              
282             return Month_to_Text(int($mon))." ".int($day).", $year";
283             }
284              
285             # function to detect if a news entry is expired
286             sub expired
287             {
288             my ( $entry ) = @_;
289             return (( defined $entry->{expires}) and
290             ( $entry->{expires} lt $nowstamp ));
291             }
292              
293             # function to get the priority value from
294             sub priority
295             {
296             my ( $entry ) = @_;
297              
298             ( defined $entry->{posted}) or return 999;
299             my ( $year, $mon, $day ) = ( $entry->{posted} =~ /^(....)(..)(..)/ );
300             my $age = Delta_Days( $year, $mon, $day, @$now );
301             my $bonus = 0;
302              
303             if ( $age <= 2 ) {
304             $bonus -= 2 - $age;
305             }
306             if (( defined $entry->{category}) and
307             ( defined $cat_priorities->{$entry->{category}}))
308             {
309             my $cat_pri = ( exists $cat_priorities->{$entry->{category}})
310             ? $cat_priorities->{$entry->{category}} : 0;
311             return $cat_pri + $age * 0.025 + $bonus;
312             } else {
313             return $cat_priorities->{"default"} + $age * 0.025
314             + $bonus;
315             }
316             }
317              
318             1;
319             __END__