blib/lib/WWW/Sucksub/Extratitles.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 22 | 24 | 91.6 |
branch | n/a | ||
condition | n/a | ||
subroutine | 8 | 8 | 100.0 |
pod | n/a | ||
total | 30 | 32 | 93.7 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package WWW::Sucksub::Extratitles; | ||||||
2 | |||||||
3 | |||||||
4 | =head1 NAME | ||||||
5 | |||||||
6 | WWW::Sucksub::Extratitles - automated access to Extratitles.com | ||||||
7 | |||||||
8 | =head1 VERSION | ||||||
9 | |||||||
10 | Version 0.01 | ||||||
11 | |||||||
12 | =cut | ||||||
13 | |||||||
14 | our $VERSION = '0.01'; | ||||||
15 | |||||||
16 | =head1 SYNOPSIS | ||||||
17 | |||||||
18 | SuckSub::Extratitles is a web automat based on the WWW::Mechanize Module | ||||||
19 | This module search and collect distant result on the Extratitles.com database. | ||||||
20 | Subtitles Files are very little files, Sucksub::Divstation store all results | ||||||
21 | in a dbm file that you can exploit to retrieve any subtitles information. | ||||||
22 | |||||||
23 | |||||||
24 | |||||||
25 | use WWW::Sucksub::Extratitles; | ||||||
26 | my $foo = WWW::Sucksub::Extratitles->new( | ||||||
27 | dbfile=> '/where/your/DBM/file is.db', | ||||||
28 | html =>'/where/your/html/repport/is.html', | ||||||
29 | motif=> 'the word(s) you search', | ||||||
30 | debug=> 1, | ||||||
31 | language=>'English' | ||||||
32 | logout => '/where/your/debug/info/are/written.log', ); | ||||||
33 | $foo->update(); # collect all link corresponding to the $foo->motif() | ||||||
34 | $foo->motif('x'); # modify the search criteria | ||||||
35 | $foo->search(); # launch a search on the local database | ||||||
36 | |||||||
37 | |||||||
38 | |||||||
39 | =head1 CONSTRUCTOR AND STARTUP | ||||||
40 | |||||||
41 | =head2 Extratitles Constructor | ||||||
42 | |||||||
43 | The new() constructor, is associated to default values : | ||||||
44 | you can modify these one as shown in the synopsis example. | ||||||
45 | |||||||
46 | my $foo = WWW::Sucksub::Extratitles->new( | ||||||
47 | html=> "$ENV{HOME}"."/sksb_Extratitles_report.html", | ||||||
48 | dbfile=> "$ENV{HOME}"."/sksb_Extratitles_db.db", | ||||||
49 | motif=> undef, | ||||||
50 | debug=> 0, | ||||||
51 | language => 'English' | ||||||
52 | logout => undef, # i.e. *STDOUT | ||||||
53 | useragent=> "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007" | ||||||
54 | ); | ||||||
55 | |||||||
56 | =head3 new() constructor attributes and associated methods | ||||||
57 | |||||||
58 | Few attributes can be set thru new() contructor's attributes. | ||||||
59 | All attributes can be modified by corresponding methods: | ||||||
60 | |||||||
61 | $foo->WWW::Sucksub::Extratitles->new() | ||||||
62 | $foo->useragent() # get the useragent attribute value | ||||||
63 | $foo->useragent('tructruc') # set the useragent attribute value to 'tructruc' | ||||||
64 | |||||||
65 | |||||||
66 | =head4 cookies_file() | ||||||
67 | |||||||
68 | arg must be a file, this default value can be modified by calling the | ||||||
69 | |||||||
70 | $foo->cookies_file('/where/my/cookies/are.txt') | ||||||
71 | |||||||
72 | modify the default value positionned by the new constructor. | ||||||
73 | |||||||
74 | $foo->cookies_file() | ||||||
75 | |||||||
76 | return the actual value of the cookies file path. | ||||||
77 | |||||||
78 | =head4 useragent() | ||||||
79 | |||||||
80 | arg should be a valid useragent. There's no reason to change this default value. | ||||||
81 | |||||||
82 | $foo->useragent() | ||||||
83 | |||||||
84 | return the value of the current useragent. | ||||||
85 | |||||||
86 | =head4 motif() | ||||||
87 | |||||||
88 | you should here give a real value to this function : | ||||||
89 | if $foo->motif is undef, the package execution will be aborted | ||||||
90 | |||||||
91 | $foo->motif('xxx') | ||||||
92 | |||||||
93 | allows to precise that you're searching a word that contains 'xxx' | ||||||
94 | |||||||
95 | $foo->motif() | ||||||
96 | |||||||
97 | return the current value of the string you search. | ||||||
98 | |||||||
99 | =head4 language() | ||||||
100 | |||||||
101 | Allows to set the langage for the subtitle search. | ||||||
102 | |||||||
103 | Default value is 0 : it means that all langages will be returned | ||||||
104 | |||||||
105 | $foo->langage('french') | ||||||
106 | |||||||
107 | allows to precise that you're searching a french subtitles | ||||||
108 | Common langages string values are : | ||||||
109 | |||||||
110 | Albanian | ||||||
111 | Argentino | ||||||
112 | Bosnian | ||||||
113 | Brazilian_portuguese | ||||||
114 | Bulgarian | ||||||
115 | Bulgarian_English | ||||||
116 | Chines GB code | ||||||
117 | Chinese | ||||||
118 | Croatian | ||||||
119 | Czech | ||||||
120 | Danish | ||||||
121 | Dutch/English | ||||||
122 | English | ||||||
123 | English - Hearing Impaired | ||||||
124 | English_German | ||||||
125 | Estonian | ||||||
126 | Finnish | ||||||
127 | French | ||||||
128 | German - Hearing Impaired | ||||||
129 | Germany | ||||||
130 | Greek | ||||||
131 | Hebrew | ||||||
132 | Hungarian/English | ||||||
133 | Hungary | ||||||
134 | Icelandic | ||||||
135 | Italy | ||||||
136 | Japanese | ||||||
137 | Kalle | ||||||
138 | Korean | ||||||
139 | |||||||
140 | |||||||
141 | =head4 debug() | ||||||
142 | |||||||
143 | WWW-Sucksub-Extratitles can produce a lot of interresting informations | ||||||
144 | The default value is "0" : that means that any debug informations will be written | ||||||
145 | on the output ( see the logout() method too.) | ||||||
146 | |||||||
147 | $foo->debug(0) # stop the product of debbugging informations | ||||||
148 | $foo->debug(1) # debug info will be written to the log file ( see logout() method) | ||||||
149 | |||||||
150 | =head4 logout() | ||||||
151 | |||||||
152 | if you want some debug information : args is 1, else 0 or undef | ||||||
153 | |||||||
154 | logout => undef; | ||||||
155 | |||||||
156 | output and optional debugging info will be produced on STDOUT | ||||||
157 | or any other descriptor if you give filename as arg. | ||||||
158 | |||||||
159 | =head4 dbfile() | ||||||
160 | |||||||
161 | define dbm file for store and retrieving extracted informations | ||||||
162 | you must provide a full path to the db file to store results. | ||||||
163 | the search() method can not be used without defined dbm file. | ||||||
164 | |||||||
165 | dbfile('/where/your/db/is.db') | ||||||
166 | |||||||
167 | The file will should be readable/writable. | ||||||
168 | |||||||
169 | =head4 html() | ||||||
170 | |||||||
171 | Define simple html output where to write search report. | ||||||
172 | you must provide au full path to the html file if you want to get an html output. | ||||||
173 | |||||||
174 | html('/where/the html/repport/is/written.html') | ||||||
175 | |||||||
176 | If $foo->html() is defined. you can get the value of this attribute like this : | ||||||
177 | |||||||
178 | my $html_page = $foo->html | ||||||
179 | |||||||
180 | html file will be used for repport with update() and search() methods. | ||||||
181 | The html page IS NOT a W3C conform html. It only allows to have a direct access to http links. | ||||||
182 | |||||||
183 | =head1 METHODS and FUNCTIONS | ||||||
184 | |||||||
185 | these functions use the precedent attributes value. | ||||||
186 | |||||||
187 | =head2 search() | ||||||
188 | |||||||
189 | this function takes no arguments. | ||||||
190 | it alows to launch a local dbm search. | ||||||
191 | |||||||
192 | $foo-> search() | ||||||
193 | |||||||
194 | the dbm file is read to give you every couple (title,link) which corresponds to | ||||||
195 | the motif() pattern. | ||||||
196 | |||||||
197 | =head2 update() | ||||||
198 | |||||||
199 | this function takes no arguments. | ||||||
200 | it allows to initiate the distant search on the web site Extratitles.com | ||||||
201 | the local dbm file is automatically written. Results are accumulated to the dbm file | ||||||
202 | you define with the . | ||||||
203 | |||||||
204 | =head2 get_all_result() | ||||||
205 | |||||||
206 | return a hash of every couple ( title, http link of subtitle file ) the search or update method returned. | ||||||
207 | |||||||
208 | my %hash=$foo->get_all_result() | ||||||
209 | |||||||
210 | |||||||
211 | =head1 SEE ALSO | ||||||
212 | |||||||
213 | =over 4 | ||||||
214 | |||||||
215 | =item * L |
||||||
216 | |||||||
217 | =item * L |
||||||
218 | |||||||
219 | =item * L |
||||||
220 | |||||||
221 | =item * L |
||||||
222 | |||||||
223 | =item * L |
||||||
224 | |||||||
225 | =item * L |
||||||
226 | |||||||
227 | =item * L |
||||||
228 | |||||||
229 | =back | ||||||
230 | |||||||
231 | =head1 AUTHOR | ||||||
232 | |||||||
233 | Timothée foucart, C<< |
||||||
234 | |||||||
235 | =head1 BUGS | ||||||
236 | |||||||
237 | Please report any bugs or feature requests to | ||||||
238 | C |
||||||
239 | L |
||||||
240 | I will be notified, and then you'll automatically be notified of progress on | ||||||
241 | your bug as I make changes. | ||||||
242 | |||||||
243 | =head1 ACKNOWLEDGEMENTS | ||||||
244 | |||||||
245 | =head1 COPYRIGHT & LICENSE | ||||||
246 | |||||||
247 | Copyright 2006 Timothée foucart, all rights reserved. | ||||||
248 | |||||||
249 | This program is free software; you can redistribute it and/or modify it | ||||||
250 | under the same terms as Perl itself. | ||||||
251 | |||||||
252 | =cut | ||||||
253 | |||||||
254 | 1 | 1 | 26076 | use warnings; | |||
1 | 4 | ||||||
1 | 30 | ||||||
255 | 1 | 1 | 5 | use strict; | |||
1 | 1 | ||||||
1 | 40 | ||||||
256 | require Exporter; | ||||||
257 | 1 | 1 | 27 | use vars qw(@ISA @EXPORT $VERSION); | |||
1 | 6 | ||||||
1 | 80 | ||||||
258 | @ISA = qw(Exporter); | ||||||
259 | @EXPORT=qw( cookies_file debug dbfile | ||||||
260 | get_all_result html logout | ||||||
261 | motif search update useragent language ); | ||||||
262 | |||||||
263 | 1 | 1 | 829 | use utf8; | |||
1 | 9 | ||||||
1 | 4 | ||||||
264 | 1 | 1 | 28 | use Carp; | |||
1 | 1 | ||||||
1 | 62 | ||||||
265 | 1 | 1 | 717 | use HTTP::Cookies; | |||
1 | 11876 | ||||||
1 | 34 | ||||||
266 | 1 | 1 | 1412 | use WWW::Mechanize; | |||
1 | 158951 | ||||||
1 | 52 | ||||||
267 | # | ||||||
268 | # | ||||||
269 | # -- | ||||||
270 | # | ||||||
271 | 1 | 1 | 504 | use Alias qw(attr); | |||
0 | |||||||
0 | |||||||
272 | use vars qw( $base $site $cookies_file $useragent $motif $debug $logout $html $dbfile $okdbfile $nbres $totalres %sstsav $fh $language %langhash); | ||||||
273 | # | ||||||
274 | # global var | ||||||
275 | my $fh; | ||||||
276 | my %sstsav; | ||||||
277 | my %langhash; | ||||||
278 | $langhash{'all'}='0'; | ||||||
279 | $langhash{'Albanian'}='42'; | ||||||
280 | $langhash{'Argentino'}='43'; | ||||||
281 | $langhash{'Bosnian'}='44'; | ||||||
282 | $langhash{'Brazilian_portuguese'}='13'; | ||||||
283 | $langhash{'Bulgarian' }='14'; | ||||||
284 | $langhash{'Bulgarian_English' }='15'; | ||||||
285 | $langhash{'Chines GB code' }='32'; | ||||||
286 | $langhash{'Chinese' }='16'; | ||||||
287 | $langhash{'Croatian' }='17'; | ||||||
288 | $langhash{'Czech' }='2'; | ||||||
289 | $langhash{'Danish' } ='12'; | ||||||
290 | $langhash{'Dutch/English' }='33'; | ||||||
291 | $langhash{'English' }='1'; | ||||||
292 | $langhash{'English - Hearing Impaired' }='34'; | ||||||
293 | $langhash{'English_German' }='18'; | ||||||
294 | $langhash{'Estonian'}='35'; | ||||||
295 | $langhash{'Finnish'}='5'; | ||||||
296 | $langhash{'French'}='6'; | ||||||
297 | $langhash{'German - Hearing Impaired' }='36'; | ||||||
298 | $langhash{'Germany' }='7'; | ||||||
299 | $langhash{'Greek' }='19'; | ||||||
300 | $langhash{'Hebrew' }='37'; | ||||||
301 | $langhash{'Hungarian/English'}='38'; | ||||||
302 | $langhash{'Hungary' }='8'; | ||||||
303 | $langhash{'Icelandic' }='20'; | ||||||
304 | $langhash{'Italy'}='3'; | ||||||
305 | $langhash{'Japanese'}='39'; | ||||||
306 | $langhash{'Kalle'}='21'; | ||||||
307 | $langhash{'Korean'}='22'; | ||||||
308 | $langhash{'Latvian'}='40'; | ||||||
309 | $langhash{'Lithuanian'}='45'; | ||||||
310 | $langhash{'Macedonian'}='41'; | ||||||
311 | $langhash{'Netherlands'}='10'; | ||||||
312 | $langhash{'Norwegian'}='23'; | ||||||
313 | $langhash{'Polish'}='4'; | ||||||
314 | $langhash{'Portuguese'}='24'; | ||||||
315 | $langhash{'Romanian'}='30'; | ||||||
316 | $langhash{'Russian'}='25'; | ||||||
317 | $langhash{'Serbian'}='29'; | ||||||
318 | $langhash{'Slovak'}='31'; | ||||||
319 | $langhash{'Slovenian'}='28'; | ||||||
320 | $langhash{'Spanish'}='9'; | ||||||
321 | $langhash{'Swedish'}='11'; | ||||||
322 | $langhash{'Turkish'}='26'; | ||||||
323 | $langhash{'other' }='27'; | ||||||
324 | # | ||||||
325 | # | ||||||
326 | sub new{ | ||||||
327 | my $Extratitles=shift; | ||||||
328 | my $classe= ref($Extratitles) || $Extratitles; | ||||||
329 | my $self={ }; | ||||||
330 | bless($self,$classe); | ||||||
331 | $self->_init(@_); | ||||||
332 | logout($self->{logout}); | ||||||
333 | #language($self->{language}); | ||||||
334 | return $self; | ||||||
335 | }; | ||||||
336 | sub _init{ | ||||||
337 | my $self= attr shift; | ||||||
338 | # | ||||||
339 | # -- init default values | ||||||
340 | # | ||||||
341 | $self->{base} = "http://titles.box.sk/"; | ||||||
342 | $self->{site} = "http://titles.box.sk/index.php"; | ||||||
343 | $self->{cookies_file} = "$ENV{HOME}"."/.cookies_sksb"; | ||||||
344 | $self->{useragent} = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007"; | ||||||
345 | $self->{motif} = undef; | ||||||
346 | $self->{debug} = 1; | ||||||
347 | $self->{logout} = undef; | ||||||
348 | $self->{html} = "$ENV{HOME}"."/Extratitles_report.html"; | ||||||
349 | $self->{dbfile} = "$ENV{HOME}"."/Extratitles_db.db"; | ||||||
350 | $self->{okdbfile} = 0; | ||||||
351 | $self->{sstsav} ={}; | ||||||
352 | $self->{language} ='all'; | ||||||
353 | # | ||||||
354 | # -- replace forced values | ||||||
355 | # | ||||||
356 | if (@_) | ||||||
357 | { | ||||||
358 | my %param=@_; | ||||||
359 | while (my($x,$y) =each(%param)){$self->{$x}=$y;}; | ||||||
360 | } | ||||||
361 | return $self; | ||||||
362 | }; | ||||||
363 | |||||||
364 | sub useragent { | ||||||
365 | my $self =attr shift; | ||||||
366 | if (@_) {$useragent=shift;} | ||||||
367 | return $useragent; | ||||||
368 | } | ||||||
369 | |||||||
370 | sub dbfile { | ||||||
371 | my $self =attr shift; | ||||||
372 | if (@_) {$dbfile=shift;$okdbfile=1}; | ||||||
373 | if ($okdbfile==0) {return undef;}; | ||||||
374 | return $dbfile; | ||||||
375 | } | ||||||
376 | sub debug { | ||||||
377 | my $self =attr shift; | ||||||
378 | if (@_) {$debug=shift;} | ||||||
379 | return $debug; | ||||||
380 | } | ||||||
381 | sub _sstsav { | ||||||
382 | my $self =attr shift; | ||||||
383 | if (@_) {%sstsav=shift;} | ||||||
384 | return %sstsav; | ||||||
385 | } | ||||||
386 | sub get_all_result { | ||||||
387 | my $self =attr shift; | ||||||
388 | %sstsav=$self->_sstsav(); | ||||||
389 | return %sstsav; | ||||||
390 | } | ||||||
391 | sub cookies_file { | ||||||
392 | my $self =attr shift; | ||||||
393 | if (@_) {$cookies_file=shift;} | ||||||
394 | return $cookies_file; | ||||||
395 | } | ||||||
396 | sub motif { | ||||||
397 | my $self = attr shift; | ||||||
398 | if (@_) {$motif=shift}; | ||||||
399 | return $motif; | ||||||
400 | } | ||||||
401 | sub logout { | ||||||
402 | if (@_){$logout=shift; } | ||||||
403 | if ($logout) | ||||||
404 | { open(FH , ">>", $logout) or croak " can not open $logout : $!\n"; | ||||||
405 | $fh=(\*FH);} | ||||||
406 | else | ||||||
407 | { open (FH, ">&STDOUT" ) or croak "Can't dup STDOUT: $!"; | ||||||
408 | $fh=(\*STDOUT);}; | ||||||
409 | return $logout; | ||||||
410 | } | ||||||
411 | sub html { | ||||||
412 | my $self =attr shift; | ||||||
413 | if (@_) {$html=shift;} | ||||||
414 | else {$html=$self;}; | ||||||
415 | unless (-e ($html)) | ||||||
416 | { | ||||||
417 | print $fh "[DEBUG] html report file doesn't exists \n"; | ||||||
418 | print $fh "[DEBUG] default value is now : ".$self->{html}." \n"; | ||||||
419 | } | ||||||
420 | return $html; | ||||||
421 | } | ||||||
422 | sub _open_html{ | ||||||
423 | open(HTMLFILE,">>",$html) | ||||||
424 | or croak "can not create $html : $! \n"; | ||||||
425 | print HTMLFILE " report generated by suckSub perl module \n"; |
||||||
426 | print HTMLFILE "searching : ".motif()." on ".$site." \n"; |
||||||
427 | print HTMLFILE " ".localtime()." \n"; |
||||||
428 | return; | ||||||
429 | } | ||||||
430 | sub language{ | ||||||
431 | my $self =attr shift; | ||||||
432 | if (@_){$language=shift; | ||||||
433 | if (defined($langhash{$language})) | ||||||
434 | { | ||||||
435 | my $lang=$langhash{$language}; | ||||||
436 | printf "langage is now set to ".$language." [ ".$lang." ]\n" if ($debug); | ||||||
437 | return $lang;} | ||||||
438 | else | ||||||
439 | { croak "language $language is not recognized !\n";}; | ||||||
440 | }; | ||||||
441 | }; | ||||||
442 | sub update { | ||||||
443 | my $self =attr shift; | ||||||
444 | unless ($motif){croak "You must provide a string value to motif()....exit\n";}; | ||||||
445 | my $mech = WWW::Mechanize->new(agent=>$useragent, | ||||||
446 | cookie_jar => HTTP::Cookies->new( | ||||||
447 | file => $cookies_file, | ||||||
448 | autosave => 1, | ||||||
449 | ignore_discard => 0, | ||||||
450 | ), | ||||||
451 | stack_depth => 1, | ||||||
452 | ); | ||||||
453 | my $next=0;# next page indicator (0/1) | ||||||
454 | |||||||
455 | if ($html){_open_html();}; | ||||||
456 | print $fh "------------------------------------------------------------------------------------------------\n" if ($debug); | ||||||
457 | print $fh "[DEBUG] begin scan on $site at : ".localtime()."\n" if ($debug); | ||||||
458 | print $fh "[DEBUG] searching : ".$motif." on $site \n" if ($debug); | ||||||
459 | my $page = 1; # pagination | ||||||
460 | if ($debug) {print $fh "\n[DEBUG \t Extratitles PAGE $page]\n";}; | ||||||
461 | $mech->get('http://titles.box.sk/index.php?p=se&pas=as') or warn "[WARNING] http get problem on : $site !! \n"; | ||||||
462 | # launch advanced search research | ||||||
463 | $mech->form_name('as_form'); | ||||||
464 | $mech->select( 'jaz' , $langhash{$self->{language}} );#i.e. langage = french | ||||||
465 | $mech->field( 'z3' , $motif,1 ); | ||||||
466 | $mech->field( 'p', 'se',1 ); | ||||||
467 | $mech->click(); | ||||||
468 | if ($debug) { print $fh "[DEBUG \t GET URL \t : \t ".$mech->uri()."]\n" if ($debug);}; | ||||||
469 | # so we parse all result page one by one | ||||||
470 | ($nbres,$next) = _parse_Extratitles($mech,$page); | ||||||
471 | printf $fh "[DEBUG] next page detected \n" if $debug; | ||||||
472 | $totalres=$nbres; | ||||||
473 | # | ||||||
474 | # verify if we need to change page to get next search results | ||||||
475 | # | ||||||
476 | while ($next>0) | ||||||
477 | { | ||||||
478 | $page = $page+1; | ||||||
479 | #http://titles.box.sk/index.php?pid=subt2&p=se&bp=40&bn=0&z3=e&jaz=6 | ||||||
480 | # | | | \langage | ||||||
481 | # | | \z3=search motif | ||||||
482 | # | \display range begin | ||||||
483 | # \number of subtitles to display | ||||||
484 | my $nbdisp=$page*20; | ||||||
485 | $mech->get( "http://titles.box.sk/index.php?pid=subt2&p=se&bp=20&bn=".$nbdisp."&z3=".$motif."&jaz=".$langhash{$self->{language}}) | ||||||
486 | or warn "get problem on page : $page : $! \n"; | ||||||
487 | if ($debug) { print $fh "[DEBUG \t PAGE : $page]\n";print $fh "[DEBUG \t GET URL \t : \t ".$mech->uri() ."]\n";}; | ||||||
488 | ($nbres,$next) = _parse_Extratitles($mech,$page); | ||||||
489 | $totalres=$totalres+$nbres; | ||||||
490 | }; | ||||||
491 | |||||||
492 | # | ||||||
493 | print $fh "[DEBUG \t : $totalres found on $base]\n" if ($debug); | ||||||
494 | print $fh "[END]\n" if ($debug); | ||||||
495 | #print html report | ||||||
496 | if ($html) | ||||||
497 | { | ||||||
498 | $nbres=0; | ||||||
499 | while (my ($k,$v) =each(%sstsav)) | ||||||
500 | { | ||||||
501 | print HTMLFILE "".$v." \n"; |
||||||
502 | $nbres++; | ||||||
503 | } | ||||||
504 | |||||||
505 | } | ||||||
506 | #finish and close all open file(s) | ||||||
507 | if ($html) | ||||||
508 | { | ||||||
509 | print HTMLFILE " ".$nbres." result(s) found \n"; |
||||||
510 | print HTMLFILE " report finished at ".localtime()." \n"; |
||||||
511 | } | ||||||
512 | |||||||
513 | |||||||
514 | close HTMLFILE; | ||||||
515 | return; | ||||||
516 | }; | ||||||
517 | |||||||
518 | # | ||||||
519 | # ---local search if $dbfile exist | ||||||
520 | # | ||||||
521 | sub search { | ||||||
522 | my $self =attr shift; | ||||||
523 | unless ($motif){croak "You must provide a string value to motif() attribute....exit\n";}; | ||||||
524 | unless ($dbfile) { croak " no DB file defined : exit ... \n";}; | ||||||
525 | #html report | ||||||
526 | if ($html) | ||||||
527 | { | ||||||
528 | open(HTMLFILE,">>",$html) | ||||||
529 | or croak "can not create $html : $! \n"; | ||||||
530 | print HTMLFILE " local search on dm file : $dbfile \n"; |
||||||
531 | print HTMLFILE "searching : ".$motif." on ".$site." \n"; |
||||||
532 | print HTMLFILE " ".localtime()." \n"; |
||||||
533 | }; | ||||||
534 | #print html report | ||||||
535 | #local search --> print and finish html report | ||||||
536 | _search_dbm($dbfile); | ||||||
537 | return; | ||||||
538 | |||||||
539 | }; | ||||||
540 | # | ||||||
541 | #--- this function = to parse only one result page | ||||||
542 | # | ||||||
543 | sub _parse_Extratitles{ | ||||||
544 | my $mech=$_[0];my $page =$_[1]; | ||||||
545 | my $jnd=0; my $jnd2=0; | ||||||
546 | my $oktitle=0;my $okurl=0; | ||||||
547 | my $next_page_exists=0; | ||||||
548 | my $f_url; my $f_title; | ||||||
549 | my $lnk=$mech->find_all_links(); | ||||||
550 | my $nbl = $#{$lnk}; my $ind=0; | ||||||
551 | print $fh "[DEBUG] searching links on : ".$mech->uri()." ]\n" if ($debug); | ||||||
552 | |||||||
553 | # =4= rechercher les liens des reponses de la recherche | ||||||
554 | my @sstlist=[];my @ssturl=[];# memo array | ||||||
555 | for ( my $ind=0; $ind < $#{$lnk} ; $ind++) | ||||||
556 | { | ||||||
557 | # search and memorize the subtitle label | ||||||
558 | # can be fixed if site changes | ||||||
559 | # --Title links should have these syntax : | ||||||
560 | # --http://titles.box.sk/index.php?pid=subt2&p=i&rid= 207488 | ||||||
561 | # --Subtitle file must have an url text = "DOWNLOAD" | ||||||
562 | #search lovie name | ||||||
563 | if ( ($lnk->[$ind]->url() =~ m/(^?pid=subt2\&p=i\&rid=)([0-9]+$)/g ) | ||||||
564 | and ($lnk->[$ind]->text()!~m/MORE INFO/) | ||||||
565 | ) | ||||||
566 | { | ||||||
567 | push @sstlist,$lnk->[$ind]->text(); | ||||||
568 | print $fh "[FOUND MOVIE NAME]\n\t".$lnk->[$ind]->text()."\n" if $debug; | ||||||
569 | $f_title=scalar($lnk->[$ind]->text()); | ||||||
570 | $oktitle=1; | ||||||
571 | }; | ||||||
572 | # search subtitle url to download | ||||||
573 | # test if text() is f=defined avoid warning | ||||||
574 | # then if found, we can save in sstsav hash | ||||||
575 | if ( (defined($lnk->[$ind]->text()) and ($lnk->[$ind]->text() =~m/(\DOWNLOAD)/) ) | ||||||
576 | ) | ||||||
577 | { | ||||||
578 | push @ssturl,$lnk->[$ind]->url_abs(); | ||||||
579 | print $fh "[FOUND SUBTITLE LINK]\n\t". scalar($lnk->[$ind]->url_abs()) ."\n" if $debug; | ||||||
580 | $f_url=scalar($lnk->[$ind]->url_abs()); | ||||||
581 | $okurl=1; | ||||||
582 | if ($oktitle==1) | ||||||
583 | { | ||||||
584 | $sstsav{$f_url}=$f_title; | ||||||
585 | $oktitle=0;$okurl=0; | ||||||
586 | } | ||||||
587 | }; | ||||||
588 | if (defined($lnk->[$ind]->text()) and ($lnk->[$ind]->text() =~/>/) ) | ||||||
589 | {$next_page_exists=1;} | ||||||
590 | }; | ||||||
591 | # verify we get any result for the search request | ||||||
592 | if ( $#ssturl < 1) { print $fh " PAS DE RESULTAT pour $motif sur Extratitles\n";return (0,0);}; | ||||||
593 | #else save and print if $html | ||||||
594 | print $fh "[DEBUG] Found ". $#ssturl ." subtitles on page : ".$page."\n" if ($debug); | ||||||
595 | $nbres=$#ssturl; | ||||||
596 | _save_dbm(); | ||||||
597 | #and reinit sstsav | ||||||
598 | %sstsav={}; | ||||||
599 | return ($nbres,$next_page_exists); | ||||||
600 | }; | ||||||
601 | sub _save_dbm{ | ||||||
602 | my %xstsav; | ||||||
603 | use DB_File; | ||||||
604 | tie (%xstsav,'DB_File',$dbfile ) | ||||||
605 | or croak "can not use $dbfile : $!\n"; | ||||||
606 | while (my ($k, $v) = each %sstsav) | ||||||
607 | { $xstsav{$k}=$v; print $fh "[DEBUG][DBM] saving $v [$k] into db \n" if ($debug);}; | ||||||
608 | untie(%xstsav); | ||||||
609 | return; | ||||||
610 | }; | ||||||
611 | sub _search_dbm{ | ||||||
612 | use DB_File; | ||||||
613 | my %hashread; | ||||||
614 | my $nb_local_res; | ||||||
615 | unless (-e ($dbfile)) | ||||||
616 | {croak "[DEBUG SEARCH] db file ".$dbfile." not found ! \n";}; | ||||||
617 | tie(%hashread,'DB_File',$dbfile) | ||||||
618 | or croak "can not access : $dbfile : $!\n"; | ||||||
619 | if ($html) | ||||||
620 | { | ||||||
621 | print HTMLFILE " Searching : ".$motif." on local database : \n"; |
||||||
622 | print HTMLFILE " DBM file is :".$dbfile." \n"; |
||||||
623 | } | ||||||
624 | while (my ($k,$v)=each(%hashread)) | ||||||
625 | { | ||||||
626 | if ($v =~ m/$motif/i) | ||||||
627 | { | ||||||
628 | print $fh "[FOUND Libelle ] $v \n[FOUND LINK]". $base.$k ."\n" if $debug; | ||||||
629 | if ($html) | ||||||
630 | { | ||||||
631 | my $url=$k; | ||||||
632 | if ($k !~ m/http:\/\//im){my $url=$base.$k} | ||||||
633 | print HTMLFILE "".$v." \n"; |
||||||
634 | $nb_local_res++; | ||||||
635 | }; | ||||||
636 | }; | ||||||
637 | }; | ||||||
638 | untie(%hashread); | ||||||
639 | if ($html) | ||||||
640 | { | ||||||
641 | print HTMLFILE " [ ".$nb_local_res." result(s) found on local DB ] \n"; |
||||||
642 | print HTMLFILE " report finished at ".localtime()." \n"; |
||||||
643 | } | ||||||
644 | return; | ||||||
645 | }; | ||||||
646 | sub END{ | ||||||
647 | my $self =attr shift; | ||||||
648 | close HTMLFILE;close FH; | ||||||
649 | return; | ||||||
650 | }; | ||||||
651 | |||||||
652 | 1; # End of WWW-Sucksub::Extratitles |