File Coverage

blib/lib/News/Overview.pm
Criterion Covered Total %
statement 15 157 9.5
branch 0 80 0.0
condition 0 32 0.0
subroutine 5 24 20.8
pod 16 16 100.0
total 36 309 11.6


line stmt bran cond sub pod time code
1             $VERSION = "0.12";
2             package News::Overview;
3             our $VERSION = "0.12";
4              
5             # -*- Perl -*- # Fri Oct 10 11:29:51 CDT 2003
6             #############################################################################
7             # Written by Tim Skirvin . Copyright 2003, Tim
8             # Skirvin. Redistribution terms are below.
9             #############################################################################
10              
11             =head1 NAME
12              
13             News::Overview - an object to store condensed information about Usenet posts
14              
15             =head1 SYNOPSIS
16              
17             use News::Overview;
18             use Net::NNTP;
19            
20             my $overview = News::Overview->new();
21             my $nntp = new Net::NNTP;
22              
23             $nntp->group("killfile.test");
24             $overview->add_from_nntp($nntp->xover);
25              
26             foreach my $entry ( $overview->sort ('thread', $overview->entries) ) {
27             print $overview->print_entry($entry), "\n";
28             }
29              
30             =head1 DESCRIPTION
31              
32             News::Overview objects store combined information about many messages, as
33             generally done in INN's overview format and semi-codified in the XOVER
34             extentions to RFC1036. Each object is meant to store a single
35             newsgroup's worth of basic header information - by default the message
36             number, subject, poster, date of posting, message identifier, references
37             to the article's parents, size of the body, number of lines in the body,
38             and information on where this message is stored within the server.
39             This information is then used to offer summartes of messages in the group,
40             sort the messages, and so forth.
41              
42             The main unit of storage within News::Overview is the object
43             News::Overview::Entry; each one of these contains information on a single
44             article. News::Overview itself is dedicated to creating, storing, and
45             manipulating these Entries.
46              
47             =head1 USAGE
48              
49             All of this object's usage is contained within its functions.
50              
51             =cut
52              
53             ###############################################################################
54             ### main() ####################################################################
55             ###############################################################################
56              
57 1     1   706 use strict;
  1         2  
  1         32  
58 1     1   1134 use News::Article;
  1         24000  
  1         54  
59 1     1   739 use News::Overview::Entry;
  1         4  
  1         38  
60 1     1   1192 use Net::NNTP::Functions;
  1         462  
  1         49  
61              
62 1     1   5 use vars qw( @DEFAULT );
  1         2  
  1         1962  
63             @DEFAULT = qw( Subject: From: Date: Message-ID: References:
64             Bytes: Lines: Xref:full );
65              
66             =head2 Basic Functions
67              
68             =over 4
69              
70             =item new ( [ DEFAULT_ARRAY_REF ] )
71              
72             Creates a new News::Overview object.
73              
74             If C is offered, we will use this to define which
75             fields are stored in all the associated Entries; otherwise, we default to
76             the fields in C<@News::Overview::DEFAULT>. The 'Number:' field is added
77             as well, to store the "article number" that each entry is associated with.
78              
79             Returns the new blessed object, or undef if unsuccessful.
80              
81             =cut
82              
83             sub new {
84 0     0 1   my ($proto, $default) = @_;
85 0   0       $default ||= "";
86 0   0       my $class = ref($proto) || $proto;
87 0           my $self = {
88 0 0         Defaults => [ ref $default ? @{$default} : @DEFAULT ],
89             # Count => 0, # Number of articles currently in here
90             Articles => {}, # Actual article information
91             Article_By_ID => {}, # Actual article information
92             };
93 0           unshift @{$$self{Defaults}}, "Number:";
  0            
94 0           $$self{'Fields'} = [ _fields($$self{'Defaults'}) ];
95 0           bless $self, $class;
96 0           $self;
97             }
98              
99             =item default ()
100              
101             In array context, returns the full list of default information associated
102             with each Entry. In scalar context, returns the same as an arrayref.
103              
104             =item defaults ( )
105              
106             Same as default(), except this information is instead based on
107             @News::Overview::DEFAULT (ie doesn't include Number:).
108              
109             =item fields ()
110              
111             In array context, returns the list of fields stored in each associated
112             Entry. In scalar context, returns this as an arrayref.
113              
114             This differs from default() only in as much as everything after the ':' is
115             trimmed; these are meant to be used as
116              
117             =cut
118              
119 0 0   0 1   sub default { wantarray ? @{shift->{'Defaults'}} : shift->{'Defaults'} ; }
  0            
120 0     0 1   sub defaults { _fields(@DEFAULT) }
121 0 0   0 1   sub fields { wantarray ? @{shift->{'Fields'}} : shift->{'Fields'} ; }
  0            
122              
123             # =item value ( KEY [, VALUE ])
124             #
125             # Returns the ...hey, wait a second, we're not doing anything with this!
126             #
127             # =cut
128             #
129             # sub value {
130             # my ($self, $key, $value) = @_;
131             # return undef unless $key;
132             # $self->values->{$key} = $value if defined $value;
133             # $self->values->{$key};
134             # }
135             #
136             # =item values ( )
137             #
138             # =cut
139             #
140             # sub values { shift->{'Values'} }
141            
142             =item entries ()
143              
144             Returns the (unsorted) array of News::Overview::Entry objects within the
145             object.
146              
147             =cut
148              
149 0     0 1   sub entries { values %{shift->{Articles}} }
  0            
150              
151             =item count ()
152              
153             Returns the number of News::Overview::Entry objects associated with this
154             object.
155              
156             =cut
157              
158 0 0   0 1   sub count { scalar values %{shift->{Articles}} || 0 }
  0            
159              
160             =back
161              
162             =head2 Adding Entries
163              
164             These functions add new News::Overview::Entry items to the object, as
165             parsed from several sources.
166              
167             =over 4
168              
169             =item insert_entry ( NUMBER, INFOARRAY )
170              
171             Actually does the work of inserting an Entry into the object. C
172             is the article number, which is used as they key for this Entry;
173             C is the list of information necessary for each Entry, sorted
174             by whatever function called this one.
175              
176             Returns undef if there's already an entry matching the given C,
177             otherwise returns the new entry.
178              
179             =cut
180              
181             sub insert_entry {
182 0     0 1   my ($self, @info) = @_;
183 0           my %hash;
184 0   0       foreach ($self->fields) { $hash{$_} = shift @info || ""; }
  0            
185 0           my $msgid = $hash{'Message-ID'};
186              
187             # Don't do anything more if there's already an entry for this
188 0 0         return undef if $self->{'Articles_By_ID'}->{$msgid};
189              
190 0   0       my @refs = split(/\s+/, $hash{'References'} || "");
191 0           my $item = new News::Overview::Entry($msgid, \@refs, %hash);
192 0           foreach (@refs) {
193 0   0       my $artbyid = $self->{'Article_By_ID'}->{$_} || undef;
194 0 0         if ($artbyid) { $artbyid->add_child($item) }
  0            
195             }
196 0           $self->{'Article_By_ID'}->{$msgid} = $item;
197              
198 0           my $number = $hash{'Number'}; # Ought to abort if we don't have this
199 0           $self->{'Articles'}->{$number} = $item;
200 0           $item;
201             }
202              
203             =item add_xover ( LINES )
204              
205             Reads in raw xover C (such as those created by print()) and creates
206             entries for each, using insert_entry(). Returns the number of Entries
207             that were succesfully added.
208              
209             =cut
210              
211             sub add_xover {
212 0     0 1   my ($self, @lines) = @_;
213 0           my $count = 0;
214 0           foreach my $line (@lines) {
215 0           chomp; my ($art, @info) = split(/\t/, $line);
  0            
216 0 0         $self->insert_entry( $art, @info ) and $count++
217             }
218 0           $$self{Count} += $count; $count;
  0            
219             }
220              
221             =item add_from_nntp ( LINEHASH )
222              
223             Reads in the information returned by Net::NNTP's xover() function, and and
224             creates entries for each, using insert_entry(). Returns the number of
225             Entries that were succesfully added.
226              
227             =cut
228              
229             sub add_from_nntp {
230 0     0 1   my ($self, %lines) = @_;
231 0           my $count = 0;
232 0           foreach my $art (keys %lines) {
233 0 0         next unless ref $lines{$art};
234 0           my @info = @{$lines{$art}};
  0            
235 0 0         $self->insert_entry( $art, @info ) and $count++
236             }
237              
238 0           $$self{Count} += $count; $count;
  0            
239             }
240              
241             =item add_from_article ( NUMBER, ARTICLE )
242              
243             Takes C
, a News::Article object, and generates the necessary
244             information to populate an Entry from it. C is the key that will
245             be associated with the article; we need it separately because we can't
246             really get it from the article directly. Returns 1 if successful, 0 if
247             not (roughly the same as add_xover() and add_from_nntp()).
248              
249             =cut
250              
251             sub add_from_article {
252 0     0 1   my ($self, $num, $article) = @_;
253 0 0 0       return undef unless ($num && $article && ref $article);
      0        
254              
255 0           my @info;
256 0 0         my @defaults = ref $self ? $self->default : @DEFAULT;
257 0           foreach my $field (@defaults) {
258 0           $field =~ s/:.*//;
259 0 0         next if $field eq 'Number';
260             # next unless $field;
261 0 0         if (lc $field eq 'lines') { push @info, $article->lines; }
  0 0          
    0          
262 0           elsif (lc $field eq 'bytes') { push @info, $article->bytes; }
263             elsif ($article->header($field)) {
264 0           push @info, $article->header($field);
265 0           } else { push @info, '' }
266             }
267 0 0         $self->insert_entry( $num, @info ) ? 1 : 0;
268             }
269              
270              
271             =head2 Sorting Functions
272              
273             These functions are used to sort the Entries within the News::Overview
274             object.
275              
276             =over 4
277              
278             =item sort ( SORTTYPE, ENTRIES )
279              
280             Sort array C based on C. Possible sorting types (case
281             insensitive):
282            
283             thread Uses thread() to sort the messages
284             date Sort (numerically) by the article time
285             time Sort (numerically) by the article time
286             lines Sort (numerically) by lines, then by time
287             (other) Sort (with 'cmp') based on the value of the specified
288             field, ie sort by 'From' or 'Subject', then by time
289            
290             If C is prefixed with a '-', then we will return the entries in
291             revere order.
292              
293             Returns the sorted array.
294              
295             =cut
296              
297             sub sort {
298 0   0 0 1   my ($self, $sort, @entries) = @_; $sort ||= "";
  0            
299 0           my ($reverse, $type) = $sort =~ m/^(\-?)(.*)$/;
300 0   0       $type ||= 'Number';
301 0           my @return;
302 0 0 0       if (lc $type eq 'thread') { # thread them
    0          
    0          
    0          
303 0           @return = $self->thread(@entries);
304             } elsif (lc $type eq 'lines') {
305 0 0         @return = sort {
306 0           ( $a->values->{ucfirst lc $type} <=> $b->values->{ucfirst lc $type} )
307             || ( $a->time <=> $b->time ) } @entries;
308 0           } elsif ( lc $type eq 'date' || lc $type eq 'time' ) {
309 0           @return = sort { ( $a->time <=> $b->time ) } @entries;
  0            
310             } elsif ( grep { lc $_ eq lc $type } $self->fields ) {
311 0 0         @return = sort {
312 0           ( $a->values->{ucfirst lc $type} cmp $b->values->{ucfirst lc $type} )
313             || ( $a->time <=> $b->time ) } @entries;
314             } else {
315 0           @return = sort { $a->values->{Number} <=> $b->values->{Number} } @entries;
  0            
316             }
317 0 0         $reverse ? reverse @return : @return;
318             }
319              
320             =item thread ( ENTRIES )
321              
322             Sort C by thread - that is, with articles that directly follow up
323             to a given article following the first article. The general algorithm:
324              
325             Sort ENTRIES by depth and time of posting.
326             For each entry, return the entry and its sorted children.
327             No article is returned twice.
328              
329             This doesn't quite work the way you'd expect it to; if the original parent
330             isn't there, any number of children may appear elsewhere, because there
331             was no common parent C to hold things together. The only solution
332             I can see is to look at parents as well, sorting them but not printing
333             them, which isn't currently being done; I may do this in a future version
334             of this package.
335              
336             This function is fairly computationally intensive. It might be nice to
337             cache this information somehow in some applications; I suspect that this
338             would be a job for a different module, however. There's probably also
339             some computational cruft that I haven't looked for yet.
340              
341             =cut
342              
343             sub thread {
344 0     0 1   my ($self, @entries) = @_;
345              
346 0           my %added;
347              
348             my @return;
349 0           foreach my $ent ( sort News::Overview::_bythread @entries ) {
350 0 0         next unless ref $ent;
351 0           my $parent = $ent->id;
352 0 0         push @return, $ent unless ($added{$ent->id}++);
353 0           my @children = $ent->children; my @tosort;
  0            
354 0 0         foreach (@children) { push @tosort, $_ unless $added{$_->id} }
  0            
355 0 0         next unless @tosort;
356 0           foreach my $item ( $self->thread(@tosort) ) {
357 0 0         next unless ref $item;
358             # my $item = $$self{Article_By_ID}->{$_};
359 0 0         push @return, $item unless ($added{$item->id}++);
360             }
361             }
362            
363 0           @return;
364             }
365              
366              
367             =back
368              
369             =head2 NNTP Functions
370              
371             These functions perform functions similar to those requested by Net::NNTP,
372             and are therefore useful for creating modules dedicated to getting this
373             information in other ways.
374              
375             =over 4
376              
377             =item overview_fmt ()
378              
379             Returns an array reference to the field names, in order, that are stored
380             in the Entries.
381              
382             =cut
383              
384 0 0   0 1   sub overview_fmt { my ($self) = @_; ref $self ? $self->default : \@DEFAULT; }
  0            
385              
386             =item xover ( MESSAGESPEC [, FIELDS ] )
387              
388             Returns a hash reference where the keys are the message numbers and the
389             values are array references containing the overview fields for that
390             message. C is parsed with B's
391             messagespec() function to decide wich articles to get; C is an
392             array of fields to retrieve, which (if not offered) will default to the
393             value of fields().
394              
395             We aren't currently dealing with the response if C is a
396             message-ID (or empty); we're assuming that it's just numbers. This is
397             wrong.
398              
399             =cut
400              
401             sub xover {
402 0     0 1   my ($self, $spec, @fields) = @_;
403 0 0         @fields = $self->fields unless scalar @fields;
404 0           my ($first, $last) = messagespec($spec);
405             # my ($first, $last) = split('-', $spec);
406 0   0       $first ||= 0;
407 0           my %entries;
408 0           foreach my $key (keys %{$self->{Articles}}) {
  0            
409 0 0         next if $key < $first;
410 0 0 0       next unless ($last > 0 && $key <= $last);
411              
412 0           my $entry = $$self{Articles}->{$key};
413             # Should be able to get the article by ID too
414 0 0         next unless $entry;
415              
416 0           my @over;
417 0           foreach (@fields) {
418 0 0         next if $_ eq 'Number'; # Skip the 'Number' field
419 0           push @over, $entry->values->{$_}
420             }
421              
422 0           $entries{$key} = \@over;
423             }
424 0           \%entries;
425             }
426              
427             =back
428              
429             =head2 Printing Functions
430              
431             These functions offer printable versions of the overview information,
432             which can be used for long-term storage.
433              
434             =over 4
435              
436             =item print ( SORT [, FIELDS] )
437              
438             Makes a printable version of all of the Entries in the object. Sorts the
439             entries based on C; C describes which fields to output;
440             defaults to fields(). The saved fields are separated with tabs, with all
441             other whitespace trimmed. This is suitable for saving out to a file and
442             later reading back in with add_xover().
443              
444             Returns an array of lines of text containing the information in array
445             context, or in scalar context returns these lines joined with newlines.
446              
447             =cut
448              
449             sub print {
450 0     0 1   my ($self, $sort, @fields) = @_;
451 0 0         @fields = $self->fields unless scalar @fields;
452 0           my @return;
453 0           my @entries = $self->sort($sort, values %{$self->{Articles}});
  0            
454 0           foreach my $art (@entries) {
455 0           push @return, $self->print_entry($art, @fields);
456             }
457 0 0         wantarray ? @return : join("\n", @return);
458             }
459              
460             =item print_entry ( ENTRY )
461              
462             Print a specific entry's worth of information, as described above.
463              
464             =cut
465              
466             sub print_entry {
467 0     0 1   my ($self, $entry, @fields) = @_;
468 0 0 0       return "" unless ($entry && ref $entry);
469 0 0         @fields = $self->fields unless scalar @fields;
470 0           my @over;
471 0           foreach (@fields) { push @over, $entry->value($_) }
  0            
472 0           map { s/\s/ /g; } @over; # Trim all whitespace
  0            
473 0           join("\t", @over);
474             }
475              
476             =back
477              
478             =cut
479              
480             ###############################################################################
481             ### Internal Functions ########################################################
482             ###############################################################################
483              
484             ### _fields ( ${@heads} )
485             # Retuns canonical names for the fields header. Takes an arrayref,
486             # returns an array.
487             sub _fields {
488 0     0     my $heads = shift;
489 0           my @heads = @{$heads};
  0            
490 0           map { s/:.*//g; lc $_ } @heads;
  0            
  0            
491 0           @heads;
492             }
493              
494             ### _bythread ( $a, $b )
495             # Sort function, to do sorts by thread - this means depth first, then
496             # number. The actual "do children first" part is in thread().
497             sub _bythread {
498 0 0   0     $a->depth <=> $b->depth
499             ||
500             $a->time <=> $b->time
501             # $a->values->{Number} <=> $b->values->{Number}
502             }
503              
504             ### _bythread_basic ( $a, $b )
505             # More basic, depth-only search.
506 0     0     sub _bythread_basic { $a->depth <=> $b->depth }
507              
508             =head1 REQUIREMENTS
509              
510             News::Overview::Entry, News::Article, Net::NNTP::Functions
511              
512             =head1 SEE ALSO
513              
514             B, B, B
515              
516             =head1 NOTES
517              
518             This was originally designed to be used with News::Archive and kiboze.pl;
519             it eventually got dragged into News::Web as well, and so it became worth
520             making into a separate function. It also didn't quite fit into my newslib
521             project, since it might be worth optimizing this specifically in the
522             future. Aah, well.
523              
524             =head1 TODO
525              
526             We should build xhdr(), xpat(), and other Net::NNTP functions into here,
527             just like xover() and overview_fmt().
528              
529             It would be nice if there was a way to say "return 500 entries" in an
530             xover-type context, instead of "return 1 through 500"; sadly, since
531             Net::NNTP->xover() doesn't have this, so I'll have to work out some other
532             way to implement it.
533              
534             We should be able to limit what we're returning in some more logical
535             manner, ie with an SQL-type select() function - "return all entries posted
536             between x and y dates", or "return all entries posted by user z", or
537             whatever.
538              
539             =head1 AUTHOR
540              
541             Tim Skirvin
542              
543             =head1 COPYRIGHT
544              
545             Copyright 2003 by Tim Skirvin . This code may be
546             distributed under the same terms as Perl itself.
547              
548             =cut
549              
550             1;
551              
552             ###############################################################################
553             ### Version History ###########################################################
554             ###############################################################################
555             # v0.01b Fri Oct 10 11:32:39 CDT 2003
556             ### First commented version (above date indicates the start of the comments)
557             # v0.10b Fri Oct 10 15:25:43 CDT 2003
558             ### Took out some unnecessary code where necessary. Made a print_entry()
559             ### function.
560             # v0.11b Fri Oct 10 15:36:45 CDT 2003
561             ### Very minor documentation changes
562             # v0.12 Thu Apr 22 13:19:25 CDT 2004
563             ### No real changes; internal code layout.