| blib/lib/WWW/Sucksub/Divxstation.pm | |||
|---|---|---|---|
| Criterion | Covered | Total | % | 
| statement | 25 | 27 | 92.5 | 
| branch | n/a | ||
| condition | n/a | ||
| subroutine | 9 | 9 | 100.0 | 
| pod | n/a | ||
| total | 34 | 36 | 94.4 | 
| line | stmt | bran | cond | sub | pod | time | code | 
|---|---|---|---|---|---|---|---|
| 1 | package WWW::Sucksub::Divxstation; | ||||||
| 2 | |||||||
| 3 | |||||||
| 4 | =head1 NAME | ||||||
| 5 | |||||||
| 6 | WWW::Sucksub::Divxstation - automated access to divxstation.com | ||||||
| 7 | |||||||
| 8 | =head1 VERSION | ||||||
| 9 | |||||||
| 10 | Version 0.04 | ||||||
| 11 | |||||||
| 12 | =cut | ||||||
| 13 | |||||||
| 14 | our $VERSION = '0.04'; | ||||||
| 15 | |||||||
| 16 | =head1 SYNOPSIS | ||||||
| 17 | |||||||
| 18 | SuckSub::Divxstation is a wab robot based on the WWW::Mechanize Module | ||||||
| 19 | This module search and collect distant result on the divxstation.com base | ||||||
| 20 | Subtitles Files are very little files, Sucksub::Divstation store all results | ||||||
| 21 | of any search in a dbm file. You can retrieve it through an html file. | ||||||
| 22 | |||||||
| 23 | |||||||
| 24 | |||||||
| 25 | use WWW::Sucksub::Divxstation; | ||||||
| 26 | my $foo = WWW::Sucksub::Divxstation->new( | ||||||
| 27 | dbfile=> '/where/your/DBM/file is.db', | ||||||
| 28 | html =>'/where/your/html/repport/is.html', | ||||||
| 29 | motif=> 'the word(s) you search', | ||||||
| 30 | debug=> 1, | ||||||
| 31 | logout => '/where/your/debug/info/are/written.log', ); | ||||||
| 32 | $foo->update(); # collect all link corresponding to the $foo->motif() | ||||||
| 33 | $foo->motif('x'); # modify the search criteria | ||||||
| 34 | $foo->search(); # launch a search on the local database | ||||||
| 35 | |||||||
| 36 | |||||||
| 37 | |||||||
| 38 | =head1 CONSTRUCTOR AND STARTUP | ||||||
| 39 | |||||||
| 40 | =head2 Divxstation Constructor | ||||||
| 41 | |||||||
| 42 | The new() constructor, is associated to default values : | ||||||
| 43 | you can modify these one as shown in the synopsis example. | ||||||
| 44 | |||||||
| 45 | my $foo = WWW::Sucksub::Divxstation->new( | ||||||
| 46 | html=> "$ENV{HOME}"."/sksb_divxstation_report.html", | ||||||
| 47 | dbfile=> "$ENV{HOME}"."/sksb_divxstation_db.db", | ||||||
| 48 | motif=> undef, | ||||||
| 49 | debug=> 0, | ||||||
| 50 | logout => undef, # i.e. *STDOUT | ||||||
| 51 | useragent=> "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007" | ||||||
| 52 | ); | ||||||
| 53 | |||||||
| 54 | =head3 new() constructor attributes and associated methods | ||||||
| 55 | |||||||
| 56 | Few attributes can be set thru new() attributes. | ||||||
| 57 | All attributes can be modified by corresponding methods: | ||||||
| 58 | |||||||
| 59 | $foo->WWW::Sucksub::Divxstation->new() | ||||||
| 60 | $foo->useragent() # get the useragent attribute value | ||||||
| 61 | $foo->useragent('tructruc') # set the useragent attribute value to 'tructruc' | ||||||
| 62 | |||||||
| 63 | |||||||
| 64 | =head4 cookies_files() | ||||||
| 65 | |||||||
| 66 | arg must be a file, this default value can be modified by calling the | ||||||
| 67 | |||||||
| 68 | $foo->cookies_file('/where/my/cookies/are.txt') | ||||||
| 69 | |||||||
| 70 | modify the default value positionned by the new constructor. | ||||||
| 71 | |||||||
| 72 | $foo->cookies_file() | ||||||
| 73 | |||||||
| 74 | return the actual value of the cookies file path. | ||||||
| 75 | |||||||
| 76 | =head4 useragent() | ||||||
| 77 | |||||||
| 78 | arg should be a valid useragent. There's no reason to change this default value. | ||||||
| 79 | |||||||
| 80 | $foo->useragent() | ||||||
| 81 | |||||||
| 82 | return the value of the current useragent. | ||||||
| 83 | |||||||
| 84 | =head4 motif() | ||||||
| 85 | |||||||
| 86 | you should here give a real value to this function : | ||||||
| 87 | if $foo->motif is undef, the package execution will be aborted | ||||||
| 88 | |||||||
| 89 | $foo->motif('xxx') | ||||||
| 90 | |||||||
| 91 | allows to precise that you're searching a word that contains 'xxx' | ||||||
| 92 | |||||||
| 93 | $foo->motif() | ||||||
| 94 | |||||||
| 95 | return the current value of the string you search. | ||||||
| 96 | |||||||
| 97 | =head4 debug() | ||||||
| 98 | |||||||
| 99 | WWW-Sucksub-Divxstation can produce a lot of interresting informations | ||||||
| 100 | The default value is "0" : that means that any debug informations will be written | ||||||
| 101 | on the output ( see the logout() method too.) | ||||||
| 102 | |||||||
| 103 | $foo->debug(0) # stop the product of debbugging informations | ||||||
| 104 | $foo->debug(1) # debug info will be written to the log file ( see logout() method) | ||||||
| 105 | |||||||
| 106 | =head4 logout() | ||||||
| 107 | |||||||
| 108 | if you want some debug information : args is 1, else 0 or undef | ||||||
| 109 | |||||||
| 110 | logout => undef; | ||||||
| 111 | |||||||
| 112 | output and optional debugging info will be produced ont STDOUT | ||||||
| 113 | or any other descriptor if you give filename as arg. | ||||||
| 114 | |||||||
| 115 | =head4 dbfile() | ||||||
| 116 | |||||||
| 117 | define dbm file for store and retrieving extracted informations | ||||||
| 118 | you must provide a full path to the db file to store results. | ||||||
| 119 | the search() method can not be used without defined dbm file. | ||||||
| 120 | |||||||
| 121 | dbfile('/where/your/db/is.db') | ||||||
| 122 | |||||||
| 123 | The file will should be readable/writable. | ||||||
| 124 | |||||||
| 125 | =head4 html() | ||||||
| 126 | |||||||
| 127 | Define simple html output where to write search report. | ||||||
| 128 | you must provide au full path to the html file if you want to get an html output. | ||||||
| 129 | |||||||
| 130 | html('/where/the/html/repport/is/written.html') | ||||||
| 131 | |||||||
| 132 | If $foo->html() is defined. you can get the value of this attribute like this : | ||||||
| 133 | |||||||
| 134 | my $html_page = $foo->html | ||||||
| 135 | |||||||
| 136 | html file will be used for repport with update() and search() methods. | ||||||
| 137 | |||||||
| 138 | |||||||
| 139 | =head1 METHODS and FUNCTIONS | ||||||
| 140 | |||||||
| 141 | these functions use the precedent attributes value. | ||||||
| 142 | |||||||
| 143 | =head2 search() | ||||||
| 144 | |||||||
| 145 | this function takes no arguments. | ||||||
| 146 | it alows to launch a local dbm search. | ||||||
| 147 | |||||||
| 148 | $foo-> search() | ||||||
| 149 | |||||||
| 150 | the dbm file is read to give you every couple (title,link) which corresponds to | ||||||
| 151 | the motif() pattern. | ||||||
| 152 | |||||||
| 153 | =head2 update() | ||||||
| 154 | |||||||
| 155 | this function takes no arguments. | ||||||
| 156 | it alows to initiate the distant search on the web site divxstation.com | ||||||
| 157 | the local dbm file is automatically written. Results are accumulated to the dbm file | ||||||
| 158 | you define. | ||||||
| 159 | |||||||
| 160 | =head2 get_all_result() | ||||||
| 161 | |||||||
| 162 | return a hash of every couple ( title, http link of subtitle file ) the search or update method returned. | ||||||
| 163 | |||||||
| 164 | my %hash=$foo->get_all_result() | ||||||
| 165 | |||||||
| 166 | |||||||
| 167 | =head1 SEE ALSO | ||||||
| 168 | |||||||
| 169 | =over 4 | ||||||
| 170 | |||||||
| 171 | =item * L | ||||||
| 172 | |||||||
| 173 | =item * L | ||||||
| 174 | |||||||
| 175 | =item * L | ||||||
| 176 | |||||||
| 177 | =item * L | ||||||
| 178 | |||||||
| 179 | =item * L | ||||||
| 180 | |||||||
| 181 | =item * L | ||||||
| 182 | |||||||
| 183 | =back | ||||||
| 184 | |||||||
| 185 | =head1 AUTHOR | ||||||
| 186 | |||||||
| 187 | Timothée foucart, C<< | ||||||
| 188 | |||||||
| 189 | =head1 BUGS | ||||||
| 190 | |||||||
| 191 | Please report any bugs or feature requests to | ||||||
| 192 | C | ||||||
| 193 | L | ||||||
| 194 | I will be notified, and then you'll automatically be notified of progress on | ||||||
| 195 | your bug as I make changes. | ||||||
| 196 | |||||||
| 197 | =head1 ACKNOWLEDGEMENTS | ||||||
| 198 | |||||||
| 199 | =head1 COPYRIGHT & LICENSE | ||||||
| 200 | |||||||
| 201 | Copyright 2005 Timothée foucart, all rights reserved. | ||||||
| 202 | |||||||
| 203 | This program is free software; you can redistribute it and/or modify it | ||||||
| 204 | under the same terms as Perl itself. | ||||||
| 205 | |||||||
| 206 | =cut | ||||||
| 207 | |||||||
| 208 | 1 | 1 | 29051 | use warnings; | |||
| 1 | 3 | ||||||
| 1 | 31 | ||||||
| 209 | 1 | 1 | 5 | use strict; | |||
| 1 | 2 | ||||||
| 1 | 43 | ||||||
| 210 | require Exporter; | ||||||
| 211 | 1 | 1 | 5 | use vars qw(@ISA @EXPORT $VERSION); | |||
| 1 | 6 | ||||||
| 1 | 87 | ||||||
| 212 | @ISA = qw(Exporter); | ||||||
| 213 | @EXPORT=qw( cookies_file debug dbfile | ||||||
| 214 | get_all_result html logout | ||||||
| 215 | motif search update useragent ); | ||||||
| 216 | |||||||
| 217 | 1 | 1 | 1111 | use utf8; | |||
| 1 | 11 | ||||||
| 1 | 6 | ||||||
| 218 | 1 | 1 | 37 | use strict; | |||
| 1 | 2 | ||||||
| 1 | 31 | ||||||
| 219 | 1 | 1 | 5 | use Carp; | |||
| 1 | 2 | ||||||
| 1 | 77 | ||||||
| 220 | 1 | 1 | 988 | use HTTP::Cookies; | |||
| 1 | 28787 | ||||||
| 1 | 103 | ||||||
| 221 | 1 | 1 | 1359 | use WWW::Mechanize; | |||
| 1 | 203222 | ||||||
| 1 | 56 | ||||||
| 222 | # | ||||||
| 223 | # | ||||||
| 224 | # -- | ||||||
| 225 | # | ||||||
| 226 | 1 | 1 | 520 | use Alias qw(attr); | |||
| 0 | |||||||
| 0 | |||||||
| 227 | use vars qw( $base $site $cookies_file $useragent $motif $debug $logout $html $dbfile $okdbfile $nbres $totalres %sstsav $fh); | ||||||
| 228 | |||||||
| 229 | sub new{ | ||||||
| 230 | my $divxstation=shift; | ||||||
| 231 | my $classe= ref($divxstation) || $divxstation; | ||||||
| 232 | my $self={ }; | ||||||
| 233 | bless($self,$classe); | ||||||
| 234 | $self->_init(@_); | ||||||
| 235 | logout($self->{logout}); | ||||||
| 236 | return $self; | ||||||
| 237 | }; | ||||||
| 238 | sub _init{ | ||||||
| 239 | my $self= attr shift; | ||||||
| 240 | # | ||||||
| 241 | # -- init default values | ||||||
| 242 | # | ||||||
| 243 | $self->{base} = "http://divxstation.com"; | ||||||
| 244 | $self->{site} = "http://divxstation.com/subtitles.asp"; | ||||||
| 245 | $self->{cookies_file} = "$ENV{HOME}"."/.cookies_sksb"; | ||||||
| 246 | $self->{useragent} = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007"; | ||||||
| 247 | $self->{motif} = undef; | ||||||
| 248 | $self->{debug} = 0; | ||||||
| 249 | $self->{logout} = \*STDOUT; | ||||||
| 250 | $self->{html} = "$ENV{HOME}"."/sksb_divxstation_report.html"; | ||||||
| 251 | $self->{dbfile} = "$ENV{HOME}"."/sksb_divxstation_db.db"; | ||||||
| 252 | $self->{okdbfile} = 0; | ||||||
| 253 | $self->{sstsav} ={}; | ||||||
| 254 | # | ||||||
| 255 | # -- replace forced values | ||||||
| 256 | # | ||||||
| 257 | if (@_) | ||||||
| 258 | { | ||||||
| 259 | my %param=@_; | ||||||
| 260 | while (my($x,$y) =each(%param)){$self->{$x}=$y;}; | ||||||
| 261 | } | ||||||
| 262 | return $self; | ||||||
| 263 | }; | ||||||
| 264 | |||||||
| 265 | sub useragent { | ||||||
| 266 | my $self =attr shift; | ||||||
| 267 | if (@_) {$useragent=shift;} | ||||||
| 268 | return $useragent; | ||||||
| 269 | } | ||||||
| 270 | |||||||
| 271 | sub dbfile { | ||||||
| 272 | my $self =attr shift; | ||||||
| 273 | if (@_) {$dbfile=shift;$okdbfile=1}; | ||||||
| 274 | if ($okdbfile==0) {return undef;}; | ||||||
| 275 | return $dbfile; | ||||||
| 276 | } | ||||||
| 277 | sub debug { | ||||||
| 278 | my $self =attr shift; | ||||||
| 279 | if (@_) {$debug=shift;} | ||||||
| 280 | return $debug; | ||||||
| 281 | } | ||||||
| 282 | sub sstsav { | ||||||
| 283 | my $self =attr shift; | ||||||
| 284 | if (@_) {%sstsav=shift;} | ||||||
| 285 | return %sstsav; | ||||||
| 286 | } | ||||||
| 287 | sub get_all_result { | ||||||
| 288 | my $self =attr shift; | ||||||
| 289 | %sstsav=$self->sstsav(); | ||||||
| 290 | return %sstsav; | ||||||
| 291 | } | ||||||
| 292 | sub cookies_file { | ||||||
| 293 | my $self =attr shift; | ||||||
| 294 | if (@_) {$cookies_file=shift;} | ||||||
| 295 | return $cookies_file; | ||||||
| 296 | } | ||||||
| 297 | sub motif { | ||||||
| 298 | my $self = attr shift; | ||||||
| 299 | if (@_) {$motif=shift;return $motif ;} | ||||||
| 300 | else {return $motif ;}; | ||||||
| 301 | } | ||||||
| 302 | sub logout { | ||||||
| 303 | if (@_){$logout=shift; } | ||||||
| 304 | if ($logout) | ||||||
| 305 | { open(FH , ">>", $logout) or croak " can not open $logout : $!\n"; | ||||||
| 306 | $fh=(\*FH);} | ||||||
| 307 | else | ||||||
| 308 | { $fh=(\*STDOUT);}; | ||||||
| 309 | return $logout; | ||||||
| 310 | } | ||||||
| 311 | sub html { | ||||||
| 312 | my $self =attr shift; | ||||||
| 313 | if (@_) {$html=shift;} | ||||||
| 314 | else {$html=$self;}; | ||||||
| 315 | unless (-e ($html)) | ||||||
| 316 | {print $fh "[DEBUG] html report file doesn't exists \n";} | ||||||
| 317 | return $html; | ||||||
| 318 | } | ||||||
| 319 | sub open_html{ | ||||||
| 320 | open(HTMLFILE,">>",$html) | ||||||
| 321 | or croak "can not create $html : $! \n"; | ||||||
| 322 | print HTMLFILE " report generated by suckSub perl module \n"; | ||||||
| 323 | print HTMLFILE "searching : ".motif()." on ".$site." \n"; | ||||||
| 324 | print HTMLFILE " ".localtime()." \n"; | ||||||
| 325 | return; | ||||||
| 326 | } | ||||||
| 327 | sub update { | ||||||
| 328 | my $self =attr shift; | ||||||
| 329 | unless ($motif){print "no motif : please give he words you search....exit\n";return;}; | ||||||
| 330 | my $mech = WWW::Mechanize->new(agent=>$useragent, | ||||||
| 331 | cookie_jar => HTTP::Cookies->new( | ||||||
| 332 | file => $cookies_file, | ||||||
| 333 | autosave => 1, | ||||||
| 334 | ignore_discard => 0, | ||||||
| 335 | ), | ||||||
| 336 | stack_depth => 1, | ||||||
| 337 | ); | ||||||
| 338 | if ($html){open_html();}; | ||||||
| 339 | print $fh "------------------------------------------------------------------------------------------------\n" if ($debug); | ||||||
| 340 | print $fh "[DEBUG] begin scan on $site at : ".localtime()."\n" if ($debug); | ||||||
| 341 | print $fh "[DEBUG] searching : ".$motif." on $site \n" if ($debug); | ||||||
| 342 | my $page = 1; # pagination | ||||||
| 343 | if ($debug) {print $fh "\n[DEBUG \t DIVXSTATION PAGE $page]\n";}; | ||||||
| 344 | $mech->get($site.'?le='.$motif.'&l=18&f=&page=&Submit=search+subtitles') or warn "[WARNING] http get problem on : $site !! \n"; | ||||||
| 345 | $mech->form_name('theform'); | ||||||
| 346 | $mech->set_fields( le => $motif ); | ||||||
| 347 | $mech->set_fields( l => 18 ); #i.e. langage = french | ||||||
| 348 | $mech->click('Submit'); | ||||||
| 349 | if ($debug) { print $fh "[DEBUG \t GET URL \t : \t ".$mech->uri()."]\n" if ($debug);}; | ||||||
| 350 | $nbres = parse_divxstation($mech); | ||||||
| 351 | $totalres=$nbres; | ||||||
| 352 | # | ||||||
| 353 | # verify if we need to change page to get next search results | ||||||
| 354 | # | ||||||
| 355 | while ($totalres eq (20*$page)) | ||||||
| 356 | { print $fh "[DEBUG][COUNT RESULTS] page num : ".$page." number result : ".$nbres."\n" if ($debug); | ||||||
| 357 | $page = $page+1; | ||||||
| 358 | $mech->get( "http://divxstation.com/searchSubtitles.asp?l=18&f=&le=".$motif."&page=".$page) | ||||||
| 359 | or warn "get problem sur page : $page : $! \n"; | ||||||
| 360 | if ($debug) { print $fh "[DEBUG \t PAGE : $page]\n";print $fh "[DEBUG \t GET URL \t : \t ".$mech->uri() ."]\n";}; | ||||||
| 361 | $nbres = parse_divxstation($mech,$page); | ||||||
| 362 | $totalres=$totalres+$nbres; | ||||||
| 363 | }; | ||||||
| 364 | |||||||
| 365 | # | ||||||
| 366 | print $fh "[DEBUG \t : $totalres trouves sur $base]\n" if ($debug); | ||||||
| 367 | print $fh "[END]\n" if ($debug); | ||||||
| 368 | #print html report | ||||||
| 369 | if ($html) | ||||||
| 370 | { | ||||||
| 371 | $nbres=0; | ||||||
| 372 | while (my ($k,$v) =each(%sstsav)) | ||||||
| 373 | { | ||||||
| 374 | print HTMLFILE "".$v." \n"; | ||||||
| 375 | $nbres++; | ||||||
| 376 | } | ||||||
| 377 | |||||||
| 378 | } | ||||||
| 379 | #finish and close all open file(s) | ||||||
| 380 | if ($html) | ||||||
| 381 | { | ||||||
| 382 | print HTMLFILE " ".$nbres." result(s) found \n"; | ||||||
| 383 | print HTMLFILE " report finished at ".localtime()." \n"; | ||||||
| 384 | } | ||||||
| 385 | |||||||
| 386 | |||||||
| 387 | close HTMLFILE; | ||||||
| 388 | return; | ||||||
| 389 | }; | ||||||
| 390 | |||||||
| 391 | # | ||||||
| 392 | # ---local search if $dbfile exist | ||||||
| 393 | # | ||||||
| 394 | sub search { | ||||||
| 395 | my $self =attr shift; | ||||||
| 396 | unless ($dbfile) { print $fh " no DB file defined : exit ... \n";}; | ||||||
| 397 | #html report | ||||||
| 398 | if ($html) | ||||||
| 399 | { | ||||||
| 400 | open(HTMLFILE,">>",$html) | ||||||
| 401 | or croak "can not create $html : $! \n"; | ||||||
| 402 | print HTMLFILE " local search on dm file : $dbfile \n"; | ||||||
| 403 | print HTMLFILE "searching : ".$motif." on ".$site." \n"; | ||||||
| 404 | print HTMLFILE " ".localtime()." \n"; | ||||||
| 405 | }; | ||||||
| 406 | #print html report | ||||||
| 407 | #local search --> print and finish html report | ||||||
| 408 | search_dbm($dbfile); | ||||||
| 409 | return; | ||||||
| 410 | |||||||
| 411 | }; | ||||||
| 412 | # | ||||||
| 413 | #--- this function = to parse only one result page | ||||||
| 414 | # | ||||||
| 415 | |||||||
| 416 | sub parse_divxstation{ | ||||||
| 417 | my $mech=$_[0];my $page =$_[1]; | ||||||
| 418 | my $jnd=0; my $jnd2=0; | ||||||
| 419 | my $lnk=$mech->find_all_links(); | ||||||
| 420 | my $nbl = $#{$lnk}; my $ind=0; | ||||||
| 421 | print $fh "[DEBUG] searching links on : ".$mech->uri()." ]\n" if ($debug); | ||||||
| 422 | |||||||
| 423 | # =4= rechercher les liens des reponses de la recherche | ||||||
| 424 | my @sstlist=[];my @ssturl=[];# memo array | ||||||
| 425 | for ( my $ind=0; $ind <= $#{$lnk} ; $ind++) | ||||||
| 426 | { | ||||||
| 427 | # search and memorize the subtitle label | ||||||
| 428 | if ( ($lnk->[$ind]->url() =~ m/(subtitle)(\.asp)(\?sId=)([0-9]+$)/g ) | ||||||
| 429 | and ( $lnk->[$ind]->url() !~ m/userinfo/ ) ) | ||||||
| 430 | { | ||||||
| 431 | push @sstlist,$lnk->[$ind]->text(); | ||||||
| 432 | push @ssturl,$lnk->[$ind]->url_abs(); | ||||||
| 433 | print $fh "[FOUND]". $lnk->[$ind]->text() ."\n\t". $lnk->[$ind]->url_abs()."\n" if $debug; | ||||||
| 434 | $jnd++ ; | ||||||
| 435 | }; | ||||||
| 436 | }; | ||||||
| 437 | # verify we get any result for the search request | ||||||
| 438 | if ( $jnd < 1) { print $fh " PAS DE RESULTAT pour $motif sur divxstation\n";return 0;}; | ||||||
| 439 | print $fh "[DEBUG] nombre de lien premier niveau ". $jnd ."\n" if ($debug); | ||||||
| 440 | # from the main result page, we need to follow link to found the sub http adress | ||||||
| 441 | # to get the uri of the subtitle file | ||||||
| 442 | for ( my $n=0; $n <= $jnd ; $n++) | ||||||
| 443 | { | ||||||
| 444 | my $result2 = $mech->get( $ssturl[$n] ); | ||||||
| 445 | print $fh "[DEBUG] GET ". $mech->uri()."\n" if ($debug); | ||||||
| 446 | my $lnk2=$mech->find_all_links(); | ||||||
| 447 | print $fh "[DEBUG] link number : ". $n."\n" if ($debug); | ||||||
| 448 | # | ||||||
| 449 | for ( my $ind2=0; $ind2 <= $#{$lnk2} ; $ind2++) | ||||||
| 450 | { | ||||||
| 451 | if ( $lnk2->[$ind2]->text() =~ m/Download subtitle/ ) | ||||||
| 452 | { | ||||||
| 453 | print $fh "[FOUND LINK] link : ". $lnk2->[$ind2]->url_abs() ."\n" if ($debug); | ||||||
| 454 | $sstsav{$lnk2->[$ind2]->url_abs()}=$sstlist[$n]; | ||||||
| 455 | }; | ||||||
| 456 | $jnd2++ ; # next sub in every cases | ||||||
| 457 | }; | ||||||
| 458 | }; # end loop | ||||||
| 459 | $nbres=$jnd; | ||||||
| 460 | save_dbm(); | ||||||
| 461 | return $nbres; | ||||||
| 462 | }; | ||||||
| 463 | sub save_dbm{ | ||||||
| 464 | my %xstsav; | ||||||
| 465 | use DB_File; | ||||||
| 466 | tie (%xstsav,'DB_File',$dbfile ) | ||||||
| 467 | or croak "can not use $dbfile : $!\n"; | ||||||
| 468 | while (my ($k, $v) = each %sstsav) | ||||||
| 469 | { $xstsav{$k}=$v; print $fh "[DEBUG][DBM] saving $v [$k] into db \n" if ($debug);}; | ||||||
| 470 | untie(%xstsav); | ||||||
| 471 | return; | ||||||
| 472 | }; | ||||||
| 473 | sub search_dbm{ | ||||||
| 474 | use DB_File; | ||||||
| 475 | my %hashread; | ||||||
| 476 | my $nb_local_res; | ||||||
| 477 | unless (-e ($dbfile)) | ||||||
| 478 | {croak "[DEBUG SEARCH] db file ".$dbfile." not found ! \n";}; | ||||||
| 479 | tie(%hashread,'DB_File',$dbfile) | ||||||
| 480 | or croak "can not access : $dbfile : $!\n"; | ||||||
| 481 | if ($html) | ||||||
| 482 | { print HTMLFILE " Searching : ".$motif." on local database : \n"; | ||||||
| 483 | print HTMLFILE " DBM file is :".$dbfile." \n"; | ||||||
| 484 | } | ||||||
| 485 | while (my ($k,$v)=each(%hashread)) | ||||||
| 486 | { | ||||||
| 487 | if ($v =~ m/$motif/i) | ||||||
| 488 | { | ||||||
| 489 | print $fh "[FOUND Libelle ] $v \n[FOUND LINK]". $base.$k ."\n"; | ||||||
| 490 | if ($html) | ||||||
| 491 | { | ||||||
| 492 | my $url=$k; | ||||||
| 493 | if ($k !~ m/http:\/\//im){my $url=$base.$k} | ||||||
| 494 | print HTMLFILE "".$v." \n"; | ||||||
| 495 | $nb_local_res++; | ||||||
| 496 | }; | ||||||
| 497 | }; | ||||||
| 498 | }; | ||||||
| 499 | untie(%hashread); | ||||||
| 500 | if ($html) | ||||||
| 501 | { | ||||||
| 502 | print HTMLFILE " [ ".$nb_local_res." result(s) found on local DB ] \n"; | ||||||
| 503 | print HTMLFILE " report finished at ".localtime()." \n"; | ||||||
| 504 | } | ||||||
| 505 | return; | ||||||
| 506 | }; | ||||||
| 507 | sub END{ | ||||||
| 508 | close FH; | ||||||
| 509 | close HTMLFILE; | ||||||
| 510 | }; | ||||||
| 511 | |||||||
| 512 | 1; # End of WWW-Sucksub::Divxstation |