File Coverage

blib/lib/HTML/Robot/Scrapper.pm

Criterion	Covered	Total	%
statement	1	3	33.3
branch			n/a
condition			n/a
subroutine	1	1	100.0
pod			n/a
total	2	4	50.0

line	stmt	sub	time	code
1				package HTML::Robot::Scrapper;
2	1	1	45591	use Moose;
	0
	0
3				#use Class::Load ':all';
4				use Data::Dumper;
5				use Data::Printer;
6				use Try::Tiny;
7				use HTML::Robot::Scrapper::Benchmark::Default;
8				use HTML::Robot::Scrapper::Log::Default;
9				use HTML::Robot::Scrapper::Parser::Default;
10				use HTML::Robot::Scrapper::Queue::Default;
11				use HTML::Robot::Scrapper::UserAgent::Default;
12				use HTML::Robot::Scrapper::Encoding::Default;
13
14				our $VERSION = '0.11';
15
16				=head1 ATTRIBUTES
17
18				=cut
19
20				=head2 reader
21
22				this attribute access your reader class instance
23
24				=cut
25				has reader => (
26				is => 'rw',
27				# default => sub {
28				#
29				# },
30				);
31
32				=head2 writer
33
34				this attribute accesses your writer class instance
35
36				=cut
37				has writer => (
38				is => 'rw',
39				# default => sub {
40				# },
41				);
42
43				=head2 benchmark
44
45				not ready, i want a catalyst type of method tree like debug for each method for each request
46
47				=cut
48				has benchmark => (
49				is => 'rw',
50				default => sub {
51				HTML::Robot::Scrapper::Benchmark::Default->new();
52				},
53				);
54				=head2 chache
55
56				the cache works, with CHI however its only useful right now for GET requests for specific urls.
57				using the cache you will not need to download the page each time, so its good for dev
58
59				=cut
60				has cache => (
61				is => 'rw',
62				# default => sub {
63				# HTML::Robot::Scrapper::Cache::Default->new();
64				# },
65				);
66				=head2 log
67
68				the log is not ready yet however it will be log4perl
69
70				=cut
71				has log => (
72				is => 'rw',
73				default => sub {
74				HTML::Robot::Scrapper::Log::Default->new();
75				},
76				);
77				=head2 parser
78
79				The default parser reads content types:
80
81				- text/html with HTML::TreeBuilder::XPath
82
83				which is in file: lib/HTML/Robot/Scrapper/Parser/HTML/TreeBuilder/XPath.pm
84
85				- text/xml with XML::XPath
86
87				which is in file: lib/HTML/Robot/Scrapper/Parser/XML/XPath.pm
88
89				and the parser is:
90
91				-base: lib/HTML/Robot/Scrapper/Parser/Base.pm
92
93				override with:
94
95				my $robot = HTML::Robot::Scrapper->new (
96				....
97				log => {
98				base_class => 'HTML::Robot::Scrapper::Log::Base', #optional, your custom base class
99				class => 'Default' #or HTML::Robot::Scrapper::Log::Default
100				},
101				...
102				)
103
104				-default: lib/HTML/Robot/Scrapper/Parser/Default.pm
105
106				=cut
107				has parser => (
108				is => 'rw',
109				default => sub {
110				HTML::Robot::Scrapper::Parser::Default->new();
111				},
112				);
113				=head2 queue
114
115				base_class: lib/HTML/Robot/Scrapper/Queue/Base.pm
116
117				default class: lib/HTML/Robot/Scrapper/Queue/Default.pm (Simple Instance Array)
118
119				you can override the whole thing using a custom base_class, or simply use
120
121				a different class
122
123				my $robot = HTML::Robot::Scrapper->new (
124				....
125				queue => {
126				base_class => 'HTML::Robot::Scrapper::Queue::Base',
127				class => 'HTML::Robot::Scrapper::Queue::Default'
128				},
129				...
130				)
131
132				=cut
133				has queue => (
134				is => 'rw',
135				default => sub {
136				HTML::Robot::Scrapper::Queue::Default->new();
137				},
138				);
139
140				=head2 useragent
141				=cut
142				has useragent => (
143				is => 'rw',
144				default => sub {
145				HTML::Robot::Scrapper::UserAgent::Default->new();
146				},
147				);
148
149				=head2 encoding
150				=cut
151				has encoding => (
152				is => 'rw',
153				default => sub {
154				HTML::Robot::Scrapper::Encoding::Default->new();
155				},
156				);
157
158
159				has custom_attrs => (
160				is => 'rw',
161				default => sub {
162				return [qw/benchmark cache log parser queue useragent encoding/];
163				}
164				);
165
166				=head2 new
167
168				my $robot = HTML::Robot::Scrapper->new (
169				reader => HTML::Robot::Scrapper::Reader::TestReader->new,
170				writer => HTML::Robot::Scrapper::Writer::TestWriter->new,
171				# cache => CHI->new(
172				# driver => 'BerkeleyDB',
173				# root_dir => dir( getcwd() , "cache" ),
174				# ),
175				# log => HTML::Robot::Scrapper::Log::Default->new(),
176				# parser => HTML::Robot::Scrapper::Parser::Default->new(),
177				# queue => HTML::Robot::Scrapper::Queue::Default->new(),
178				# useragent => HTML::Robot::Scrapper::UserAgent::Default->new(),
179				# encoding => HTML::Robot::Scrapper::Encoding::Default->new(),
180				);
181
182				=cut
183
184
185
186				=head2 before 'start'
187
188				- give access to this class inside other custom classes
189
190				=cut
191
192				before 'start' => sub {
193				my ( $self ) = @_;
194				foreach my $attr ( @{ $self->custom_attrs } ) {
195				#give access to this class inside other classes
196				$self->$attr->robot( $self ) if defined $self->$attr and $self->$attr->can( "robot" );
197				}
198				$self->reader->robot( $self );
199				};
200
201				sub start {
202				my ( $self ) = @_;
203				$self->reader->on_start( $self );
204				my $counter = 0;
205				while ( my $item = $self->queue->queue_get_item ) {
206				$self->benchmark->method_start('finish_in');
207
208				print '--[ '.$counter++.' ]------------------------------------------------------------------------------'."\n";
209				print ' url: '. $item->{ url }."\n" if exists $item->{ url };
210				my $method = $item->{ method };
211				my $res = $self->useragent->visit($item);
212
213				#clean up&set passed_key_values
214				$self->reader->passed_key_values( {} );
215				$self->reader->passed_key_values( $item->{passed_key_values} )
216				if exists $item->{passed_key_values};
217
218				#clean up&set passed_key_values
219				$self->reader->headers( {} );
220				$self->reader->headers( $res->{headers} )
221				if exists $res->{headers};
222
223				#TODO: set the cookies in $self->reader->cookies
224				# that way its possible to use and update 1 same cookie
225
226
227				$self->benchmark->method_start( $method );
228				try {
229				$self->reader->$method( );
230				} catch {
231				warn "ERROR on reader->$method: $_";
232				};
233				$self->benchmark->method_finish( $method );
234
235				$self->benchmark->method_finish('finish_in', 'Total: ' );
236				}
237				$self->reader->on_finish( );
238				}
239
240				=head1 NAME
241
242				HTML::Robot::Scrapper - Your robot to parse webpages
243
244				=head1 SYNOPSIS
245
246				See a working example under the module: WWW::Tabela::Fipe ( search on github ).
247
248				The class
249
250				HTML::Robot::Scrapper::Parser::Default
251
252				handles only text/html and text/xml by default
253
254				So i need to add an extra option for text/plain and tell it to use
255
256				the same method that already parses text/html, here is an example:
257
258				* im using the code from the original as base class for this:
259
260				HTML::Robot::Scrapper::Parser::Default
261
262				Here i will redefine that class and tell my $robot to favor it
263
264				...
265				parser => WWW::Tabela::Fipe::Parser->new,
266				...
267
268				See below:
269
270				package WWW::Tabela::Fipe::Parser;
271				use Moo;
272
273				has [qw/engine robot/] => ( is => 'rw' );
274
275				with('HTML::Robot::Scrapper::Parser::HTML::TreeBuilder::XPath');
276				with('HTML::Robot::Scrapper::Parser::XML::XPath');
277
278				sub content_types {
279				my ( $self ) = @_;
280				return {
281				'text/html' => [
282				{
283				parse_method => 'parse_xpath',
284				description => q{
285				The method above 'parse_xpath' is inside class:
286				HTML::Robot::Scrapper::Parser::HTML::TreeBuilder::XPath
287				},
288				}
289				],
290				'text/plain' => [
291				{
292				parse_method => 'parse_xpath',
293				description => q{
294				esse site da fipe responde em text/plain e eu preciso parsear esse content type.
295				por isso criei esta classe e passei ela como parametro, sobreescrevendo a classe
296				HTML::Robot::Scrapper::Parser::Default
297				},
298				}
299				],
300				'text/xml' => [
301				{
302				parse_method => 'parse_xml'
303				},
304				],
305				};
306				}
307
308				1;
309
310				package FIPE;
311
312				use HTML::Robot::Scrapper;
313				#use CHI;
314				use HTTP::Tiny;
315				use HTTP::CookieJar;
316				use WWW::Tabela::Fipe;
317				use WWW::Tabela::FipeWrite;
318				#use WWW::Tabela::Fipe::Parser;
319				use HTML::Robot::Scrapper::UserAgent::Default;
320
321				my $robot = HTML::Robot::Scrapper->new(
322				reader => WWW::Tabela::Fipe->new,
323				writer => WWW::Tabela::FipeWrite->new,
324				# cache =>
325				# CHI->new(
326				# driver => 'BerkeleyDB',
327				# root_dir => "/home/catalyst/WWW-Tabela-Fipe/cache/",
328				# ),
329				parser => WWW::Tabela::Fipe::Parser->new, #custom para tb fipe. pois eles respondem com Content type text/plain
330				useragent => HTML::Robot::Scrapper::UserAgent::Default->new(
331				ua => HTTP::Tiny->new(
332				cookie_jar => HTTP::CookieJar->new,
333				agent => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0'
334				),
335
336				)
337				);
338
339				$robot->start();
340
341				=head1 DESCRIPTION
342
343				This cralwer has been created to be extensible. Scalable with redis queue.
344
345				The main idea is: i need a queue of urls to be crawled, it can be an array living during
346
347				my instance (not scalable)... or it can be a Redis queue ( scallable ), being acessed by
348
349				many HTML::Robot::Scrapper instances.
350
351				Each request inserted into the queue is suposed to be independent. So this thing can scale. I mean,
352
353				Supose i need to create an object using stuff from page1, page2 and page3... that will be 3 requests
354
355				so, the first request will access page1 and collect data into $colleted_data, then, i will append another
356
357				request for page2 with $collected_data from page 1. So the request for page2 will collect some more data
358
359				and merge with $collected_data from page1 generating $collected_data_from_page1_and_page2, and then i will insert
360
361				a new request into my queue for page3 that will collect data and merge with $collected_data_from_page1_and_page2
362
363				and create the final object: $collected_data_complete.
364
365
366				Basicly, you need to create a
367
368				- reader: to read/parse your webpages and
369
370				and a
371
372				- writer: to save data you reader collected.
373
374				You 'might' need to add other content types also, creating your custom class based on:
375
376				HTML::Robot::Scrapper::Parser::Default
377
378				See it and you will understand.. by default it handles:
379
380				- text/html
381				- text/xml
382
383				=head1 READER ( you create this )
384
385				Reader: Its where the parsing logic for a specific site lives.
386
387				You customize the reader telling it where the nodes are, etc..
388
389				The reader class is where you create your parser.
390
391				=head2 WRITER ( you create this )
392
393				Writer: Its the class that will save the data the reader collects.
394
395				ie: You can create a method "save" that receives an object and simply writes into your DB.
396
397				Or you can make it write into DB + elastic search .. etc.. whatever you want
398
399
400				=head1 CONTENT TYPES AND PARSING METHODS ( you might need to extend this )
401
402				For example, after making a request call ( HTML::Robot::Scrapper::UserAgent::Default )
403
404				it will need to parse data.. and will use the response content type to parse that data
405
406				by default the class that handles that is:
407
408				package HTML::Robot::Scrapper::Parser::Default;
409				use Moose;
410
411				has [qw/engine robot/] => ( is => 'rw' );
412
413				with('HTML::Robot::Scrapper::Parser::HTML::TreeBuilder::XPath'); #gives parse_xpath
414				with('HTML::Robot::Scrapper::Parser::XML::XPath'); #gives parse_xml
415
416				sub content_types {
417				my ( $self ) = @_;
418				return {
419				'text/html' => [
420				{
421				parse_method => 'parse_xpath',
422				description => q{
423				The method above 'parse_xpath' is inside class:
424				HTML::Robot::Scrapper::Parser::HTML::TreeBuilder::XPath
425				},
426				}
427				],
428				'text/xml' => [
429				{
430				parse_method => 'parse_xml'
431				},
432				],
433				};
434				}
435
436				1;
437
438				WWW::Tabela::FIPE has a custom Parser class and you can see it as an example.
439
440				If you need to download images, you will need to create a custom parser class adding 'image/png' as content type for example.
441
442				=head1 QUEUE OF REQUESTS
443
444				Another example is the Queue system, it has an api: HTML::Robot::Scrapper::Queue::Base and by default
445
446				uses: HTML::Robot::Scrapper::Queue::Array which works fine for 1 local instance. However, lets say i want a REDIS queue, so i could
447
448				implement HTML::Robot::Scrapper::Queue::Redis and make the crawler access a remote queue.. this way i can share a queue between many crawlers independently.
449
450				Just so you guys know, i have a redis module almost ready, it needs litle refactoring because its from another personal project. It will be released asap when i got time.
451
452				So, if that does not fit you, or you want something else to handle those content types, just create a new class and pass it on to the HTML::Robot::Scrapper constructor. ie:
453
454				see the SYNOPSIS
455
456				By default it uses HTTP Tiny and useragent related stuff is in:
457
458				HTML::Robot::Scrapper::UserAgent::Default
459
460				=head1 Project Status
461
462				The crawling works as expected, and works great. And the api will not change probably.
463
464				Ideas are welcome! You are welcome to contribute.
465
466				=head1 TODO
467
468				Implement the REDIS Queue to give as option for the Array queue. Array queue runs local/per instance.. and the redis queue can be shared and accessed by multiple machines!
469
470				Still need to implement the Log, proper Benchmark with subroutine tree and timing.
471
472				Allow parameters to be passed in to UserAgent (HTTP::Tiny on this case)
473
474				Better tests and docs.
475
476				=head1 Example 1 - Append some urls and extract some data
477
478				On this first example, it shows how to make a simple crawler... by simple i mean simple GET requests following urls... and grabbing some data.
479
480				package HTML::Robot::Scrapper::Reader::TestReader;
481				use Moose;
482				with 'HTML::Robot::Scrapper::Reader';
483				use Data::Printer;
484				use Digest::SHA qw(sha1_hex);
485
486				## The commented stuff is useful as example
487
488				has startpage => (
489				is => 'rw',
490				default => sub { return 'http://www.bbc.co.uk/'} ,
491				);
492
493				has array_of_data => ( is => 'rw', default => sub { return []; } );
494
495				has counter => ( is => 'rw', default => sub { return 0; } );
496
497				sub on_start {
498				my ( $self ) = @_;
499				$self->append( search => $self->startpage );
500				$self->append( search => 'http://www.zap.com.br/' );
501				$self->append( search => 'http://www.uol.com.br/' );
502				$self->append( search => 'http://www.google.com/' );
503				}
504
505				sub search {
506				my ( $self ) = @_;
507				my $title = $self->robot->parser->tree->findvalue( '//title' );
508				my $h1 = $self->robot->parser->tree->findvalue( '//h1' );
509				warn $title;
510				warn p $self->robot->useragent->url ;
511				push( @{ $self->array_of_data } ,
512				{ title => $title, url => $self->robot->useragent->url, h1 => $h1 }
513				);
514				}
515
516				sub on_link {
517				my ( $self, $url ) = @_;
518				return if $self->counter( $self->counter + 1 ) > 3;
519				if ( $url =~ m{^http://www.bbc.co.uk}ig ) {
520				$self->prepend( search => $url ); # append url on end of list
521				}
522				}
523
524
525				sub detail {
526				my ( $self ) = @_;
527				}
528
529				sub on_finish {
530				my ( $self ) = @_;
531				$self->robot->writer->save_data( $self->array_of_data );
532				}
533
534				1;
535
536				=head1 Example 2 - Tabela FIPE ( append custom request calls )
537
538				See the working version at: https://github.com/hernan604/WWW-Tabela-Fipe
539
540				This example show an asp website that has those '__EVENTVALIDATION' and '__VIEWSTATE' which must be sent back again on each request... here is the example of such crawler for such website...
541
542				This example also demonstrates how one could easily login into a website and crawl it also.
543
544				package WWW::Tabela::Fipe;
545				use Moose;
546				with 'HTML::Robot::Scrapper::Reader';
547				use Data::Printer;
548				use utf8;
549				use HTML::Entities;
550				use HTTP::Request::Common qw(POST);
551
552				has [ qw/marcas viewstate eventvalidation/ ] => ( is => 'rw' );
553
554				has veiculos => ( is => 'rw' , default => sub { return []; });
555				has referer => ( is => 'rw' );
556
557				sub start {
558				my ( $self ) = @_;
559				}
560
561				has startpage => (
562				is => 'rw',
563				default => sub {
564				return [
565				{
566				tipo => 'moto',
567				url => 'http://www.fipe.org.br/web/indices/veiculos/default.aspx?azxp=1&v=m&p=52'
568				},
569				{
570				tipo => 'carro',
571				url => 'http://www.fipe.org.br/web/indices/veiculos/default.aspx?p=51'
572				},
573				{
574				tipo => 'caminhao',
575				url => 'http://www.fipe.org.br/web/indices/veiculos/default.aspx?v=c&p=53'
576				},
577				]
578				},
579				);
580
581				sub on_start {
582				my ( $self ) = @_;
583				foreach my $item ( @{ $self->startpage } ) {
584				$self->append( search => $item->{ url }, {
585				passed_key_values => {
586				tipo => $item->{ tipo },
587				referer => $item->{ url },
588				}
589				} );
590				}
591				}
592
593				sub _headers {
594				my ( $self , $url, $form ) = @_;
595				return {
596				'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
597				'Accept-Encoding' => 'gzip, deflate',
598				'Accept-Language' => 'en-US,en;q=0.5',
599				'Cache-Control' => 'no-cache',
600				'Connection' => 'keep-alive',
601				'Content-Length' => length( POST('url...', [], Content => $form)->content ),
602				'Content-Type' => 'application/x-www-form-urlencoded; charset=utf-8',
603				'DNT' => '1',
604				'Host' => 'www.fipe.org.br',
605				'Pragma' => 'no-cache',
606				'Referer' => $url,
607				'User-Agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:20.0) Gecko/20100101 Firefox/20.0',
608				'X-MicrosoftAjax' => 'Delta=true',
609				};
610				}
611
612				sub _form {
613				my ( $self, $args ) = @_;
614				return [
615				ScriptManager1 => $args->{ script_manager },
616				__ASYNCPOST => 'true',
617				__EVENTARGUMENT => '',
618				__EVENTTARGET => $args->{ event_target },
619				__EVENTVALIDATION => $args->{ event_validation },
620				__LASTFOCUS => '',
621				__VIEWSTATE => $args->{ viewstate },
622				ddlAnoValor => ( !exists $args->{ano} ) ? 0 : $args->{ ano },
623				ddlMarca => ( !exists $args->{marca} ) ? 0 : $args->{ marca },
624				ddlModelo => ( !exists $args->{modelo} ) ? 0 : $args->{ modelo },
625				ddlTabelaReferencia => 154,
626				txtCodFipe => '',
627				];
628				}
629
630				sub search {
631				my ( $self ) = @_;
632				my $marcas = $self->tree->findnodes( '//select[@name="ddlMarca"]/option' );
633				my $viewstate = $self->tree->findnodes( '//form[@id="form1"]//input[@id="__VIEWSTATE"]' )->get_node->attr('value');
634				my $event_validation = $self->tree->findnodes( '//form[@id="form1"]//input[@id="__EVENTVALIDATION"]' )->get_node->attr('value');
635				foreach my $marca ( $marcas->get_nodelist ) {
636				my $form = $self->_form( {
637				script_manager => 'UdtMarca\|ddlMarca',
638				event_target => 'ddlMarca',
639				event_validation=> $event_validation,
640				viewstate => $viewstate,
641				marca => $marca->attr( 'value' ),
642				} );
643				$self->prepend( busca_marca => 'url' , {
644				passed_key_values => {
645				marca => $marca->as_text,
646				marca_id => $marca->attr( 'value' ),
647				tipo => $self->robot->reader->passed_key_values->{ tipo },
648				referer => $self->robot->reader->passed_key_values->{referer },
649				},
650				request => [
651				'POST',
652				$self->robot->reader->passed_key_values->{ referer },
653				{
654				headers => $self->_headers( $self->robot->reader->passed_key_values->{ referer } , $form ),
655				content => POST('url...', [], Content => $form)->content,
656				}
657				]
658				} );
659				}
660				}
661
662				sub busca_marca {
663				my ( $self ) = @_;
664				my ( $captura1, $viewstate ) = $self->robot->useragent->content =~ m/hiddenField\\|__EVENTTARGET(.+)__VIEWSTATE\\|([^\\|]+)\\|/g;
665				my ( $captura_1, $event_validation ) = $self->robot->useragent->content =~ m/hiddenField\\|__EVENTTARGET(.+)__EVENTVALIDATION\\|([^\\|]+)\\|/g;
666				my $modelos = $self->tree->findnodes( '//select[@name="ddlModelo"]/option' );
667				foreach my $modelo ( $modelos->get_nodelist ) {
668
669
670				next unless $modelo->as_text !~ m/selecione/ig;
671				my $kv={};
672				$kv->{ modelo_id } = $modelo->attr( 'value' );
673				$kv->{ modelo } = $modelo->as_text;
674				$kv->{ marca_id } = $self->robot->reader->passed_key_values->{ marca_id };
675				$kv->{ marca } = $self->robot->reader->passed_key_values->{ marca };
676				$kv->{ tipo } = $self->robot->reader->passed_key_values->{ tipo };
677				$kv->{ referer } = $self->robot->reader->passed_key_values->{ referer };
678				my $form = $self->_form( {
679				script_manager => 'updModelo\|ddlModelo',
680				event_target => 'ddlModelo',
681				event_validation=> $event_validation,
682				viewstate => $viewstate,
683				marca => $kv->{ marca_id },
684				modelo => $kv->{ modelo_id },
685				} );
686				$self->prepend( busca_modelo => '', {
687				passed_key_values => $kv,
688				request => [
689				'POST',
690				$self->robot->reader->passed_key_values->{ referer },
691				{
692				headers => $self->_headers( $self->robot->reader->passed_key_values->{ referer } , $form ),
693				content => POST( 'url...', [], Content => $form )->content,
694				}
695				]
696				} );
697				}
698				}
699
700				sub busca_modelo {
701				my ( $self ) = @_;
702				my $anos = $self->tree->findnodes( '//select[@name="ddlAnoValor"]/option' );
703				foreach my $ano ( $anos->get_nodelist ) {
704				my $kv = {};
705				$kv->{ ano_id } = $ano->attr( 'value' );
706				$kv->{ ano } = $ano->as_text;
707				$kv->{ modelo_id } = $self->robot->reader->passed_key_values->{ modelo_id };
708				$kv->{ modelo } = $self->robot->reader->passed_key_values->{ modelo };
709				$kv->{ marca_id } = $self->robot->reader->passed_key_values->{ marca_id };
710				$kv->{ marca } = $self->robot->reader->passed_key_values->{ marca };
711				$kv->{ tipo } = $self->robot->reader->passed_key_values->{ tipo };
712				$kv->{ referer } = $self->robot->reader->passed_key_values->{ referer };
713				next unless $ano->as_text !~ m/selecione/ig;
714
715				my ( $captura1, $viewstate ) = $self->robot->useragent->content =~ m/hiddenField\\|__EVENTTARGET(.*)__VIEWSTATE\\|([^\\|]+)\\|/g;
716				my ( $captura_1, $event_validation ) = $self->robot->useragent->content =~ m/hiddenField\\|__EVENTTARGET(.*)__EVENTVALIDATION\\|([^\\|]+)\\|/g;
717				my $form = $self->_form( {
718				script_manager => 'updAnoValor\|ddlAnoValor',
719				event_target => 'ddlAnoValor',
720				event_validation=> $event_validation,
721				viewstate => $viewstate,
722				marca => $kv->{ marca_id },
723				modelo => $kv->{ modelo_id },
724				ano => $kv->{ ano_id },
725				} );
726
727				$self->prepend( busca_ano => '', {
728				passed_key_values => $kv,
729				request => [
730				'POST',
731				$self->robot->reader->passed_key_values->{ referer },
732				{
733				headers => $self->_headers( $self->robot->reader->passed_key_values->{ referer } , $form ),
734				content => POST( 'url...', [], Content => $form )->content,
735				}
736				]
737				} );
738				}
739				}
740
741				sub busca_ano {
742				my ( $self ) = @_;
743				my $item = {};
744				$item->{ mes_referencia } = $self->tree->findvalue('//span[@id="lblReferencia"]') ;
745				$item->{ cod_fipe } = $self->tree->findvalue('//span[@id="lblCodFipe"]');
746				$item->{ marca } = $self->tree->findvalue('//span[@id="lblMarca"]');
747				$item->{ modelo } = $self->tree->findvalue('//span[@id="lblModelo"]');
748				$item->{ ano } = $self->tree->findvalue('//span[@id="lblAnoModelo"]');
749				$item->{ preco } = $self->tree->findvalue('//span[@id="lblValor"]');
750				$item->{ data } = $self->tree->findvalue('//span[@id="lblData"]');
751				$item->{ tipo } = $self->robot->reader->passed_key_values->{ tipo } ;
752				warn p $item;
753
754				push( @{$self->veiculos}, $item );
755				}
756
757				sub on_link {
758				my ( $self, $url ) = @_;
759				}
760
761				sub on_finish {
762				my ( $self ) = @_;
763				warn "Terminou.... exportando dados.........";
764				$self->robot->writer->write( $self->veiculos );
765				}
766
767				=head1 DESCRIPTION
768
769				=head1 AUTHOR
770
771				Hernan Lopes
772				CPAN ID: HERNAN
773				perldelux / movimentoperl
774				hernan@cpan.org
775				http://github.com/hernan604
776
777				=head1 COPYRIGHT
778
779				This program is free software; you can redistribute
780				it and/or modify it under the same terms as Perl itself.
781
782				The full text of the license can be found in the
783				LICENSE file included with this module.
784
785
786				=head1 SEE ALSO
787
788				perl(1).
789
790				=cut
791
792				#################### main pod documentation end ###################
793
794
795				1;
796				# The preceding line will help the module return a true value
797