| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package WWW::Search::AOL; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 4 |  |  | 4 |  | 1241091 | use warnings; | 
|  | 4 |  |  |  |  | 10 |  | 
|  | 4 |  |  |  |  | 136 |  | 
| 4 | 4 |  |  | 4 |  | 21 | use strict; | 
|  | 4 |  |  |  |  | 7 |  | 
|  | 4 |  |  |  |  | 77 |  | 
| 5 |  |  |  |  |  |  |  | 
| 6 | 4 |  |  | 4 |  | 97 | use 5.008; | 
|  | 4 |  |  |  |  | 17 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | require WWW::Search; | 
| 9 |  |  |  |  |  |  |  | 
| 10 | 4 |  |  | 4 |  | 3364 | use WWW::SearchResult; | 
|  | 4 |  |  |  |  | 225734 |  | 
|  | 4 |  |  |  |  | 122 |  | 
| 11 | 4 |  |  | 4 |  | 1791 | use Encode; | 
|  | 4 |  |  |  |  | 23141 |  | 
|  | 4 |  |  |  |  | 355 |  | 
| 12 |  |  |  |  |  |  |  | 
| 13 | 4 |  |  | 4 |  | 24 | use Scalar::Util (); | 
|  | 4 |  |  |  |  | 9 |  | 
|  | 4 |  |  |  |  | 149 |  | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | =head1 NAME | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | WWW::Search::AOL - backend for searching search.aol.com | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | =head1 NOTE | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | This module currently does not work. I'll fix it if there's interest to | 
| 22 |  |  |  |  |  |  | fix it. | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | =head1 VERSION | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | Version 0.0107 | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | =cut | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | our $VERSION = '0.0107'; | 
| 31 |  |  |  |  |  |  |  | 
| 32 | 4 |  |  | 4 |  | 20 | use vars qw(@ISA); | 
|  | 4 |  |  |  |  | 7 |  | 
|  | 4 |  |  |  |  | 3705 |  | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | @ISA=(qw(WWW::Search)); | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | This module provides a backend of L to search using | 
| 39 |  |  |  |  |  |  | L. | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | use WWW::Search; | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | my $oSearch = WWW::Search->new("AOL"); | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | =head1 FUNCTIONS | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | All of these functions are internal to the module and are of no concern | 
| 48 |  |  |  |  |  |  | of the user. | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | =head2 native_setup_search() | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | This function sets up the search. | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | =cut | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | sub native_setup_search | 
| 57 |  |  |  |  |  |  | { | 
| 58 | 1 |  |  | 1 | 1 | 875 | my ($self, $native_query, $opts) = @_; | 
| 59 |  |  |  |  |  |  |  | 
| 60 | 1 |  |  |  |  | 2 | $self->{'_hits_per_page'} = 10; | 
| 61 |  |  |  |  |  |  |  | 
| 62 | 1 |  |  |  |  | 9 | $self->user_agent('non-robot'); | 
| 63 |  |  |  |  |  |  |  | 
| 64 | 1 |  |  |  |  | 18898 | $self->{'_next_to_retrieve'} = 1; | 
| 65 |  |  |  |  |  |  |  | 
| 66 | 1 |  | 50 |  |  | 21 | $self->{'search_base_url'} ||= 'http://search.aol.com'; | 
| 67 | 1 |  | 50 |  |  | 11 | $self->{'search_base_path'} ||= '/aolcom/search'; | 
| 68 |  |  |  |  |  |  |  | 
| 69 | 1 | 50 |  |  |  | 10 | if (!defined($self->{'_options'})) | 
| 70 |  |  |  |  |  |  | { | 
| 71 | 1 |  |  |  |  | 9 | $self->{'_options'} = +{ | 
| 72 |  |  |  |  |  |  | 'query' => $native_query, | 
| 73 |  |  |  |  |  |  | 'invocationType' => 'topsearchbox.webhome', | 
| 74 |  |  |  |  |  |  | }; | 
| 75 |  |  |  |  |  |  | } | 
| 76 | 1 |  |  |  |  | 5 | my $self_options = $self->{'_options'}; | 
| 77 |  |  |  |  |  |  |  | 
| 78 | 1 | 50 |  |  |  | 8 | if (defined($opts)) | 
| 79 |  |  |  |  |  |  | { | 
| 80 | 1 |  |  |  |  | 6 | foreach my $k (keys %$opts) | 
| 81 |  |  |  |  |  |  | { | 
| 82 | 1 | 50 |  |  |  | 8 | if (WWW::Search::generic_option($k)) | 
| 83 |  |  |  |  |  |  | { | 
| 84 | 1 | 50 |  |  |  | 18 | if (defined($opts->{$k})) | 
| 85 |  |  |  |  |  |  | { | 
| 86 | 1 |  |  |  |  | 9 | $self->{$k} = $opts->{$k}; | 
| 87 |  |  |  |  |  |  | } | 
| 88 |  |  |  |  |  |  | } | 
| 89 |  |  |  |  |  |  | else | 
| 90 |  |  |  |  |  |  | { | 
| 91 | 0 | 0 |  |  |  | 0 | if (defined($opts->{$k})) | 
| 92 |  |  |  |  |  |  | { | 
| 93 | 0 |  |  |  |  | 0 | $self_options->{$k} = $opts->{$k}; | 
| 94 |  |  |  |  |  |  | } | 
| 95 |  |  |  |  |  |  | } | 
| 96 |  |  |  |  |  |  | } | 
| 97 |  |  |  |  |  |  | } | 
| 98 |  |  |  |  |  |  |  | 
| 99 | 1 |  |  |  |  | 18 | $self->{'_next_url'} = $self->{'search_base_url'} . $self->{'search_base_path'} . '?' . $self->hash_to_cgi_string($self_options); | 
| 100 | 1 |  |  |  |  | 58 | $self->{'_AOL_first_retrieve_call'} = 1; | 
| 101 |  |  |  |  |  |  | } | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | =head2 parse_tree() | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | This function parses the tree and fetches the results. | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | =cut | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | sub _no_hits | 
| 110 |  |  |  |  |  |  | { | 
| 111 | 1 |  |  | 1 |  | 3 | my $self = shift; | 
| 112 |  |  |  |  |  |  |  | 
| 113 | 1 |  |  |  |  | 10 | $self->approximate_result_count(0); | 
| 114 | 1 |  |  |  |  | 13 | $self->{'_AOL_no_results_found'} = 1; | 
| 115 | 1 |  |  |  |  | 5 | return 0; | 
| 116 |  |  |  |  |  |  | } | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  | sub parse_tree | 
| 119 |  |  |  |  |  |  | { | 
| 120 | 1 |  |  | 1 | 1 | 2125665 | my ($self, $tree) = @_; | 
| 121 |  |  |  |  |  |  |  | 
| 122 | 1 | 50 |  |  |  | 7 | if ($self->{'_AOL_no_results_found'}) | 
| 123 |  |  |  |  |  |  | { | 
| 124 | 0 |  |  |  |  | 0 | return 0; | 
| 125 |  |  |  |  |  |  | } | 
| 126 |  |  |  |  |  |  |  | 
| 127 | 1 | 50 |  |  |  | 7 | if ($self->{'_AOL_first_retrieve_call'}) | 
| 128 |  |  |  |  |  |  | { | 
| 129 | 1 |  |  |  |  | 3 | $self->{'_AOL_first_retrieve_call'} = undef; | 
| 130 |  |  |  |  |  |  |  | 
| 131 | 1 |  |  |  |  | 9 | my $nohit_div = $tree->look_down("_tag", "div", "class", "NH"); | 
| 132 |  |  |  |  |  |  |  | 
| 133 | 1 | 50 |  |  |  | 2051 | if (defined($nohit_div)) | 
| 134 |  |  |  |  |  |  | { | 
| 135 | 1 | 50 | 33 |  |  | 7 | if (($nohit_div->as_text() =~ /Your search for/) && | 
| 136 |  |  |  |  |  |  | ($nohit_div->as_text() =~ /returned no results\./) | 
| 137 |  |  |  |  |  |  | ) | 
| 138 |  |  |  |  |  |  | { | 
| 139 | 1 |  |  |  |  | 454 | return $self->_no_hits(); | 
| 140 |  |  |  |  |  |  | } | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  |  | 
| 143 | 0 |  |  |  |  |  | my $wr_div = $tree->look_down("_tag", "div", "class", "BB"); | 
| 144 |  |  |  |  |  |  |  | 
| 145 | 0 | 0 |  |  |  |  | if (!defined($wr_div)) | 
| 146 |  |  |  |  |  |  | { | 
| 147 | 0 |  |  |  |  |  | return $self->_no_hits(); | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | # A word separator that includes whitespace and   (\x{a0}. | 
| 151 | 0 |  |  |  |  |  | my $word_sep = qr/[\s\x{a0}]+/; | 
| 152 |  |  |  |  |  |  |  | 
| 153 | 0 | 0 |  |  |  |  | if (my ($n) = | 
| 154 |  |  |  |  |  |  | ( | 
| 155 |  |  |  |  |  |  | $wr_div->as_text() =~ | 
| 156 |  |  |  |  |  |  | m/of${word_sep}about${word_sep}([\d,]+)/ | 
| 157 |  |  |  |  |  |  | ) | 
| 158 |  |  |  |  |  |  | ) | 
| 159 |  |  |  |  |  |  | { | 
| 160 | 0 |  |  |  |  |  | $n =~ tr/,//d; | 
| 161 | 0 |  |  |  |  |  | $self->approximate_result_count($n); | 
| 162 |  |  |  |  |  |  | } | 
| 163 |  |  |  |  |  |  | } | 
| 164 |  |  |  |  |  |  |  | 
| 165 |  |  |  |  |  |  | =begin Removed | 
| 166 |  |  |  |  |  |  |  | 
| 167 |  |  |  |  |  |  | my @h1_divs = $tree->look_down("_tag", "div", "class", "h1"); | 
| 168 |  |  |  |  |  |  | my $requested_div; | 
| 169 |  |  |  |  |  |  | foreach my $div (@h1_divs) | 
| 170 |  |  |  |  |  |  | { | 
| 171 |  |  |  |  |  |  | my $h1 = $div->look_down("_tag", "h1"); | 
| 172 |  |  |  |  |  |  | if ($h1->as_text() eq "web results") | 
| 173 |  |  |  |  |  |  | { | 
| 174 |  |  |  |  |  |  | $requested_div = $div; | 
| 175 |  |  |  |  |  |  | last; | 
| 176 |  |  |  |  |  |  | } | 
| 177 |  |  |  |  |  |  | } | 
| 178 |  |  |  |  |  |  | if (!defined($requested_div)) | 
| 179 |  |  |  |  |  |  | { | 
| 180 |  |  |  |  |  |  | die "Could not find div. Please report the error to the author of the module."; | 
| 181 |  |  |  |  |  |  | } | 
| 182 |  |  |  |  |  |  |  | 
| 183 |  |  |  |  |  |  | my $r_head_div = $requested_div->parent(); | 
| 184 |  |  |  |  |  |  | my $r_web_div = $r_head_div->parent(); | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | =end Removed | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | =cut | 
| 189 |  |  |  |  |  |  |  | 
| 190 | 0 |  |  |  |  |  | my $r_web_div = $tree->look_down("_tag", "ul", "content", "MSL"); | 
| 191 | 0 |  |  |  |  |  | my @results_divs = $r_web_div->look_down("_tag", "li", "about", qr{^r\d+$}); | 
| 192 | 0 |  |  |  |  |  | my $hits_found = 0; | 
| 193 | 0 |  |  |  |  |  | foreach my $result (@results_divs) | 
| 194 |  |  |  |  |  |  | { | 
| 195 | 0 | 0 |  |  |  |  | if ($result->attr('about') !~ m/^r(\d+)$/) | 
| 196 |  |  |  |  |  |  | { | 
| 197 | 0 |  |  |  |  |  | die "Broken Parsing. Please contact the author to fix it."; | 
| 198 |  |  |  |  |  |  | } | 
| 199 | 0 |  |  |  |  |  | my $id_num = $1; | 
| 200 | 0 |  |  |  |  |  | my $desc_tag = $result->look_down("_tag", "p", "property", "f:desc"); | 
| 201 | 0 |  |  |  |  |  | my $a_tag = $result->look_down("_tag", "a", "class", "find"); | 
| 202 | 0 |  |  |  |  |  | my $hit = WWW::SearchResult->new(); | 
| 203 | 0 |  |  |  |  |  | $hit->add_url($a_tag->attr("href")); | 
| 204 | 0 |  |  |  |  |  | $hit->description($desc_tag->as_text()); | 
| 205 | 0 |  |  |  |  |  | $hit->title($a_tag->as_text()); | 
| 206 | 0 |  |  |  |  |  | push @{$self->{'cache'}}, $hit; | 
|  | 0 |  |  |  |  |  |  | 
| 207 | 0 |  |  |  |  |  | $hits_found++; | 
| 208 |  |  |  |  |  |  | } | 
| 209 |  |  |  |  |  |  |  | 
| 210 |  |  |  |  |  |  | # Get the next URL | 
| 211 |  |  |  |  |  |  | { | 
| 212 | 0 |  |  |  |  |  | my $span_next_page = $tree->look_down("_tag", "span", "class", "gspPageNext"); | 
|  | 0 |  |  |  |  |  |  | 
| 213 | 0 |  |  |  |  |  | my @a_tags = $span_next_page->look_down("_tag", "a"); | 
| 214 |  |  |  |  |  |  | # The reverse() is there because it seems the "next" link is at | 
| 215 |  |  |  |  |  |  | # the end. | 
| 216 | 0 |  |  |  |  |  | foreach my $a_tag (reverse(@a_tags)) | 
| 217 |  |  |  |  |  |  | { | 
| 218 | 0 | 0 |  |  |  |  | if ($a_tag->as_text() =~ "Next") | 
| 219 |  |  |  |  |  |  | { | 
| 220 |  |  |  |  |  |  | $self->{'_next_url'} = | 
| 221 |  |  |  |  |  |  | $self->absurl( | 
| 222 | 0 |  |  |  |  |  | $self->{'_prev_url'}, | 
| 223 |  |  |  |  |  |  | $a_tag->attr('href') | 
| 224 |  |  |  |  |  |  | ); | 
| 225 | 0 |  |  |  |  |  | last; | 
| 226 |  |  |  |  |  |  | } | 
| 227 |  |  |  |  |  |  | } | 
| 228 |  |  |  |  |  |  | } | 
| 229 | 0 |  |  |  |  |  | return $hits_found; | 
| 230 |  |  |  |  |  |  | } | 
| 231 |  |  |  |  |  |  |  | 
| 232 |  |  |  |  |  |  |  | 
| 233 |  |  |  |  |  |  | =begin Removed | 
| 234 |  |  |  |  |  |  |  | 
| 235 |  |  |  |  |  |  | =head2 preprocess_results_page() | 
| 236 |  |  |  |  |  |  |  | 
| 237 |  |  |  |  |  |  | The purpose of this function was to decode the HTML text as returned by | 
| 238 |  |  |  |  |  |  | search.aol.com as UTF-8. But it seems recent versions of WWW::Search already | 
| 239 |  |  |  |  |  |  | have a similar mechanism. | 
| 240 |  |  |  |  |  |  |  | 
| 241 |  |  |  |  |  |  | sub preprocess_results_page | 
| 242 |  |  |  |  |  |  | { | 
| 243 |  |  |  |  |  |  | my $self = shift; | 
| 244 |  |  |  |  |  |  | my $contents = shift; | 
| 245 |  |  |  |  |  |  |  | 
| 246 |  |  |  |  |  |  | return decode('UTF-8', $contents); | 
| 247 |  |  |  |  |  |  | } | 
| 248 |  |  |  |  |  |  |  | 
| 249 |  |  |  |  |  |  | =end Removed | 
| 250 |  |  |  |  |  |  |  | 
| 251 |  |  |  |  |  |  | =cut | 
| 252 |  |  |  |  |  |  |  | 
| 253 |  |  |  |  |  |  | =head1 AUTHOR | 
| 254 |  |  |  |  |  |  |  | 
| 255 |  |  |  |  |  |  | Shlomi Fish, L . | 
| 256 |  |  |  |  |  |  |  | 
| 257 |  |  |  |  |  |  | Funded by L and | 
| 258 |  |  |  |  |  |  | L. | 
| 259 |  |  |  |  |  |  |  | 
| 260 |  |  |  |  |  |  | =head1 BUGS | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  | Please report any bugs or feature requests to | 
| 263 |  |  |  |  |  |  | C, or through the web interface at | 
| 264 |  |  |  |  |  |  | L. | 
| 265 |  |  |  |  |  |  | I will be notified, and then you'll automatically be notified of progress on | 
| 266 |  |  |  |  |  |  | your bug as I make changes. | 
| 267 |  |  |  |  |  |  |  | 
| 268 |  |  |  |  |  |  | =head1 ACKNOWLEDGEMENTS | 
| 269 |  |  |  |  |  |  |  | 
| 270 |  |  |  |  |  |  | Funded by L and | 
| 271 |  |  |  |  |  |  | L. | 
| 272 |  |  |  |  |  |  |  | 
| 273 |  |  |  |  |  |  | =head1 DEVELOPMENT | 
| 274 |  |  |  |  |  |  |  | 
| 275 |  |  |  |  |  |  | Source code is version-controlled in a Subversion repository in Berlios: | 
| 276 |  |  |  |  |  |  |  | 
| 277 |  |  |  |  |  |  | L | 
| 278 |  |  |  |  |  |  |  | 
| 279 |  |  |  |  |  |  | One can find the most up-to-date version there. | 
| 280 |  |  |  |  |  |  |  | 
| 281 |  |  |  |  |  |  | =head1 COPYRIGHT & LICENSE | 
| 282 |  |  |  |  |  |  |  | 
| 283 |  |  |  |  |  |  | Copyright 2006 Shlomi Fish, all rights reserved. | 
| 284 |  |  |  |  |  |  |  | 
| 285 |  |  |  |  |  |  | This program is released under the following license: MIT X11 (a BSD-style | 
| 286 |  |  |  |  |  |  | license). | 
| 287 |  |  |  |  |  |  |  | 
| 288 |  |  |  |  |  |  | =cut | 
| 289 |  |  |  |  |  |  |  | 
| 290 |  |  |  |  |  |  | 1; # End of WWW::Search::AOL |