File Coverage

blib/lib/News/Search.pm
Criterion Covered Total %
statement 15 121 12.4
branch 0 52 0.0
condition 0 25 0.0
subroutine 5 13 38.4
pod 3 8 37.5
total 23 219 10.5


line stmt bran cond sub pod time code
1             package News::Search;
2              
3 1     1   282258 use warnings;
  1         2  
  1         172  
4 1     1   6 use strict;
  1         2  
  1         356  
5              
6             # @Author: Tong SUN, (c)2001-2008, all right reserved
7             # @Version: $Date: 2008/11/04 17:19:30 $ $Revision: 1.15 $
8             # @HomeURL: http://xpt.sourceforge.net/
9              
10             # {{{ LICENSE:
11              
12             #
13             # Permission to use, copy, modify, and distribute this software and its
14             # documentation for any purpose and without fee is hereby granted, provided
15             # that the above copyright notices appear in all copies and that both those
16             # copyright notices and this permission notice appear in supporting
17             # documentation, and that the names of author not be used in advertising or
18             # publicity pertaining to distribution of the software without specific,
19             # written prior permission. Tong Sun makes no representations about the
20             # suitability of this software for any purpose. It is provided "as is"
21             # without express or implied warranty.
22             #
23             # TONG SUN DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
24             # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL ADOBE
25             # SYSTEMS INCORPORATED AND DIGITAL EQUIPMENT CORPORATION BE LIABLE FOR ANY
26             # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
27             # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
28             # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
29             # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
30             #
31              
32             # }}}
33              
34             # {{{ POD, Intro:
35              
36             =head1 NAME
37              
38             News::Search - Usenet news searching toolkit
39              
40             =head1 SYNOPSIS
41              
42             use News::Search;
43              
44             my $ns = News::Search->new();
45             $ns->search_for(\@ARGV);
46              
47             my %newsarticles = $ns->SearchNewsgroups;
48              
49             =head1 DESCRIPTION
50              
51             News::Search searches Usenet news postings.
52              
53             It can be used to search local news groups that google doesn't cover.
54             Or, even for news groups that are covered by google, it can give you
55             all the hits in one file, in the format that you prescribed.
56              
57             You can also use the provided L in cron to watch specific
58             news groups for specific criteria and mail you reports according to the
59             interval you set.
60              
61             =cut
62              
63             # }}}
64              
65             # {{{ Global Declaration:
66              
67             # ============================================================== &us ===
68             # ............................................................. Uses ...
69              
70             # -- global modules
71 1     1   6 use Carp;
  1         6  
  1         61  
72 1     1   5 use Net::NNTP;
  1         2  
  1         35  
73              
74 1     1   5 use base qw(Class::Accessor::Fast);
  1         2  
  1         2091  
75              
76             # ============================================================== &cs ===
77             # ................................................. Constant setting ...
78             #
79              
80             our @EXPORT = ( ); # may even omit this line
81             our $VERSION = sprintf("%d.%02d", q$Revision: 1.15 $ =~ /(\d+)\.(\d+)/);
82              
83             # }}}
84              
85             # ############################################################## &ss ###
86             # ................................................ Subroutions start ...
87              
88             =head1 METHODS
89              
90             =head2 News::Search->new(\%config_param)
91              
92             Initialize the object.
93              
94             my $searcher = News::Search->new();
95              
96             or,
97              
98             my $searcher = News::Search->new( {} );
99              
100             which are the same as:
101              
102             my $searcher = News::Search->new( {
103             nntp_server => 'news',
104             msg_headers => 'Date|From', # + Subject, which is always printed
105             msg_limit => 200,
106             verbose => 0,
107             on_group => \&default_group_handler,
108             on_message => \&default_message_handler,
109             } );
110              
111             What shown above are default settings. Any of the C<%config_param> attribute can be omitted when calling the new method.
112              
113             The C is the only class method. All the rest methods are object methods.
114              
115             =cut
116              
117             News::Search->mk_accessors(qw(nntp_server msg_headers msg_limit verbose
118             on_group on_message nntp_handle newsgroups nntp_query));
119              
120             my %config =
121             (
122             nntp_server => 'news',
123             msg_headers => 'Date|From', # + Subject, which is always printed
124             msg_limit => 200,
125             verbose => 0,
126             on_group => \&default_group_handler,
127             on_message => \&default_message_handler,
128             );
129              
130             my $verbose;
131              
132             sub new {
133 0     0 1   my ($class, $arg_ref) = @_;
134 0           my $self = $class->SUPER::new({%config, %$arg_ref});
135              
136 0           $verbose = $self->verbose;
137              
138 0           return $self;
139             }
140              
141             =head2 Object attributes
142              
143             The following object attributes are accessible.
144              
145             =over 4
146              
147             =item * nntp_server([set_val])
148              
149             The nntp server to search.
150              
151             =item * msg_headers([set_val])
152              
153             Message headers to print.
154              
155             =item * msg_limit([set_val])
156              
157             Maximum number of posts to search (not return).
158              
159             =item * verbose([set_val])
160              
161             Be verbose.
162              
163             =item * on_group([set_val])
164              
165             Handler for group starts. Refer to L for the example.
166              
167             =item * on_message([set_val])
168              
169             Handler for news message. Refer to L for the example.
170              
171             =back
172              
173             Provide the C to change the attribute, omitting it to retrieve the attribute value. E.g.,
174              
175             $searcher->nntp_server("news.easysw.com");
176              
177             =head2 Object method: search_for($array_ref)
178              
179             $searcher->search_for(\@ARGV);
180              
181             Command line parameter handling. Refer to L
182             section "command line arguments" for details.
183              
184             =cut
185              
186             sub search_for {
187 0     0 1   my ($self, $array_ref) = @_;
188              
189 0           my $nntp_server;
190 0           $nntp_server = $self->nntp_server;
191 0 0         $nntp_server = $ENV{"NNTPSERVER"} if $ENV{"NNTPSERVER"};
192              
193 0           my $nntp;
194 0 0 0       if (defined($ENV{DEBUG}) && $ENV{DEBUG} eq "1") {
195 0   0       $nntp = Net::NNTP->new($nntp_server, Debug=>'On', Timeout=>10) ||
196             croak "Cant connect to News Server: $@";
197             } else {
198 0   0       $nntp = Net::NNTP->new($nntp_server) ||
199             croak "Cant connect to News Server: $@";
200             }
201              
202 0           my @newsgroups;
203             my %args;
204              
205 0           foreach (@$array_ref) {
206 0 0         if (/=/) {
207             # key/value pair
208 0           my ($name, $value) = split(/=/);
209 0           $name = lc $name;
210 0           $args{$name} = $value;
211             } else {
212             # group name
213 0           my $ngname = $_;
214 0 0         if (index($ngname, "\*") > -1) {
215             # have wildcard (*) in group name.
216 0   0       my $nntplist = $nntp->list() || die "Cannot list newsgroups";
217 0           $ngname =~ s/\*/.*/g;
218 0           foreach (sort(keys(%$nntplist))) {
219 0 0         if (/$ngname/) {
220 0           push(@newsgroups, $_);
221             }
222             }
223             } else {
224 0           push(@newsgroups, $ngname);
225             }
226             }
227             }
228              
229 0 0         print STDERR "Searching the top ". $self->msg_limit. " messages "
230             . " in newsgroups: @newsgroups...\n\n"
231             if $verbose;
232              
233 0           $self->nntp_handle($nntp);
234 0           $self->newsgroups(\@newsgroups);
235 0           $self->nntp_query(\%args);
236              
237             }
238              
239             # default handler for group starts ...
240             sub default_group_handler {
241 0     0 0   my $newsgroup = shift;
242             #print STDERR "\n\nSearching group '$newsgroup'\n\n";
243             }
244              
245             # default handler for news message ...
246             sub default_message_handler {
247 0 0   0 0   print STDERR "." if $verbose;
248             }
249              
250             sub dbg_msg {
251 0     0 0   my $show_msg = shift;
252 0           my $show_level = shift;
253              
254 0 0         $show_level = 1 unless $show_level;
255 0 0         return unless $verbose >= $show_level;
256 0           warn "[News::Search] $show_msg\n";
257             }
258              
259             =head2 Object method: SearchNewsgroups()
260              
261             Search the given newsgroups with the given criteria:
262              
263             my %newsarticles = $ns->SearchNewsgroups;
264              
265             foreach my $article (values %newsarticles) {
266             # deal with $article->{"SUBJECT"}, @{$article->{"HEADER"}})
267             # and $article->{"BODY"}
268             }
269              
270             Refer to L for usage example.
271              
272             =cut
273              
274             sub SearchNewsgroups {
275 0     0 1   my $self = shift;
276 0           my ($newsgroups) = @_;
277 0 0         $newsgroups = $self->{newsgroups} unless $newsgroups;
278              
279 0           my $nntp = $self->{nntp_handle};
280 0           my $args = $self->{nntp_query};
281            
282 0           my %newsarticles;
283 0           foreach my $newsgroup (@$newsgroups) {
284 0           my ($first, $last) = ($nntp->group($newsgroup))[1,2];
285             #warn "] $first => $last\n";
286 0 0 0       if (($first == 0) && ($first == $last)) {
287 0           next;
288             }
289              
290 0 0         $first = $last - $self->msg_limit if $last - $self->msg_limit > $first;
291             #warn "] $first => $last\n" if $verbose;
292            
293             # == news article loop
294 0           $self->{on_group}->($newsgroup);
295 0           my $msg_headers = $self->msg_headers;
296 0   0       for ($nntp->nntpstat($first);$nntp->next() || last;) {
297 0           my $msghead = $nntp->head();
298              
299 0 0         unless(defined($msghead)){
300 0           dbg_msg "No message head found";
301 0           next;
302             }
303              
304             # Ignore html postings
305 0 0         if(arrary_search("Content-Type: text/html",$msghead)){
306 0           dbg_msg "html posting ignored (found in head)";
307 0           next;
308             }
309            
310 0           my ($msgfound, $msgsubj, $msgfrom, $newsarticle) =
311             SearchMessage($nntp, $msghead, $args);
312 0 0         next unless $msgfound;
313            
314 0           $self->{on_message}->($newsgroup, $msghead, $newsarticle);
315              
316             # Ignore html postings
317 0 0         if($newsarticle =~ "Content-Type: text/html"){
318 0           dbg_msg "html posting ignored (found in body)";
319 0           next;
320             }
321            
322             # zap excessive spaces
323 0           $newsarticle =~ s/\n(\s*\n){2,}/\n\n/;
324             # eliminate duplicated posts
325             #$newsarticles{"$msgfrom $msgsubj"} =
326 0           $newsarticles{"$msgfrom"} =
327             {
328             "SUBJECT" => $msgsubj,
329             "HEADER" => [ grep(/^($msg_headers): /, @$msghead) ],
330             #"BODY" => $newsarticle,
331             "BODY" => $newsarticle
332             };
333             }
334             }
335 0           $nntp->quit();
336 0           return %newsarticles;
337             }
338              
339              
340             # message search
341             sub SearchMessage($$$){
342 0     0 0   my ($nntp, $msghead, $args, ) = @_;
343 0           my $headmatched = my $bodymatched = 0;
344 0           my $msgfrom = "nofrom";
345 0           my $msgsubj = "nosubj";
346 0           my $i = 0;
347              
348             # -- message head loop
349             #warn "] @$msghead\n";
350 0           foreach my $headline (@$msghead) {
351 0           chomp($headline);
352 0           $headline =~ /^([^:]+): /;
353 0           my $argname = lc $1;
354 0           my $argval = "$'";
355 0 0         $msgfrom = $argval if ($argname eq 'from');
356 0 0         $msgsubj = $argval if ($argname eq 'subject');
357             # look for search patterns
358 0 0         if (defined($args->{$argname})) {
359 0           $i++;
360 0 0         if ($argval =~ m/$args->{$argname}/i) {
361             #warn "] <$args->{$argname}> $argname => $argval\n";
362 0           $headmatched = 1;
363             }
364             }
365             # look for ignore patterns
366 0 0         if (defined($args->{"no$argname"})) {
367 0 0         if ($argval =~ m/$args->{"no$argname"}/i) {
368 0           return (0, undef, undef, undef);
369             }
370             }
371             }
372 0           $msgsubj =~ s/^\w+: //; # remove re: fw:, etc
373             #warn "] headmatched = $i\n";
374              
375 0 0 0       if ($i == 0 && defined($args->{"body"})){
376             #warn "] search in the body only\n";
377 0           $headmatched = 1;
378             }
379              
380 0   0       my $msgbodyfh = $nntp->bodyfh() || Carp::shortmess
381             "Can't get body filehandle of article\n";
382              
383             # get the whole body
384 0           my $newsarticle = '';
385 0           while (my $bodyline=<$msgbodyfh>) {
386 0           $newsarticle .= $bodyline;
387             }
388             # Ignore html postings
389             #next if $newsarticle =~ m{^Content-Type: text/html|Mississauga|Scarborough|Etobicoke}mi;
390              
391 0 0         if (defined($args->{"body"})) {
392 0 0         if ($newsarticle =~ m/$args->{"body"}/i) {
393 0           $bodymatched = 1;
394             }
395             } else {
396             # not searching the body
397 0           $bodymatched = 1;
398             }
399              
400 0   0       return ($headmatched == 1 && $bodymatched == 1,
401             $msgsubj, $msgfrom, $newsarticle);
402             }
403              
404             sub arrary_search($$){
405 0     0 0   my ($look_for, $arrary_ref) = @_;
406 0           my $is_there = 0;
407 0           foreach my $elt (@$arrary_ref) {
408 0 0         if ($elt =~ /$look_for/) {
409 0           $is_there = 1;
410 0           last;
411             }
412             }
413 0           return $is_there;
414             }
415              
416             # {{{ POD, Appendixes:
417              
418             =head1 SEE ALSO
419              
420             L.
421              
422             =head1 BUGS
423              
424             Please report any bugs or feature requests to C, or through
425             the web interface at L. I will be notified, and then you'll
426             automatically be notified of progress on your bug as I make changes.
427              
428             =head1 SUPPORT
429              
430             You can find documentation for this module with the perldoc command.
431              
432             perldoc News::Search
433              
434              
435             You can also look for information at:
436              
437             =over 4
438              
439             =item * RT: CPAN's request tracker
440              
441             L
442              
443             =item * AnnoCPAN: Annotated CPAN documentation
444              
445             L
446              
447             =item * CPAN Ratings
448              
449             L
450              
451             =item * Search CPAN
452              
453             L
454              
455             =back
456              
457              
458             =head1 AUTHOR
459              
460             SUN, Tong C<< >>
461             http://xpt.sourceforge.net/
462              
463             =head1 COPYRIGHT
464              
465             Copyright 2003-2008 Tong Sun, all rights reserved.
466              
467             This program is released under the BSD license.
468              
469             =cut
470              
471             # }}}
472              
473             1; # End of News::Search