File Coverage

blib/lib/Finance/QuoteOptions.pm
Criterion Covered Total %
statement 18 429 4.2
branch 0 194 0.0
condition 0 92 0.0
subroutine 6 26 23.0
pod 18 20 90.0
total 42 761 5.5


tag to tag
line stmt bran cond sub pod time code
1             #
2             # Finance::QuoteOptions Module
3             # Extract options prices and series information from the web.
4             #
5             # (C) Copyright 2007-2010 Kirk Bocek
6             # Version 0.20 Contributions by Dan Dascalescu
7             #
8             package Finance::QuoteOptions;
9              
10             #require 5.6.1;
11 1     1   6952 use 5.006001;
  1         3  
  1         36  
12 1     1   4 use strict;
  1         2  
  1         35  
13 1     1   1398 use WWW::Mechanize;
  1         385636  
  1         48  
14             #use LWP::UserAgent; #See Changes document
15 1     1   11 use HTML::TokeParser;
  1         2  
  1         4577  
16              
17             # set the version for version checking
18             our $VERSION = 0.23;
19              
20             ############################
21             # Start of class definitions
22             ############################
23              
24             sub new {
25 0     0 1   my $class = shift;
26 0           my $self = {};
27 0           $self->{source} = 'yahoo';
28 0           $self->{symbol} = undef;
29 0           $self->{proxy} = undef;
30              
31 0 0         $self->{symbol} = shift if @_; #Set symbol if provided
32 0 0         $self->{symbol} = uc $self->{symbol} if $self->{symbol};
33              
34 0           bless ($self, $class);
35 0           return $self;
36             }
37              
38             sub symbol {
39             #Set or return target symbol
40 0     0 1   my $self = shift;
41 0 0         if (not @_) {
42 0           return $self->{symbol};
43             }
44 0           $self->{symbol} = shift;
45 0           $self->{data} = [];
46 0           $self->{success} = undef;
47 0           $self->{status} = undef;
48 0           $self->{response} = undef;
49             }
50              
51             sub source {
52             #Set or return data source
53             #Only 'yahoo' or 'cboe' is accepted
54             #Set source to 'yahoo' if anything else is provided
55 0     0 1   my $self = shift;
56              
57 0 0         return $self->{source} unless @_;
58              
59 0           my $param = shift;
60 0           $self->{source} = 'yahoo';
61 0 0         $self->{source} = 'cboe' if lc($param) eq 'cboe';
62 0           return $self->{source};
63             }
64              
65             sub retrieve {
66             #get data
67 0     0 1   my ($self, $expirations) = @_;
68 0 0         return 0 unless $self->{symbol};
69 0 0         if ($self->{source} eq 'cboe') {
70 0           $self->getcboedata();
71             } else {
72             #Yahoo is the default
73 0           $self->getyahoodata($expirations);
74             }
75 0           return $self->{success};
76             }
77              
78             sub expirations {
79             #Return arrayref of all expiration dates
80 0     0 1   my $self = shift;
81 0           my $dates = [];
82 0           push @$dates, $_->{exp} foreach @{$self->{data}};
  0            
83 0           return $dates;
84             }
85              
86             sub calls {
87             #Return arrayref with all calls for a given expiration
88             #If param is 6 or 8 characters then it's an expiration date
89             #3 or fewer characters and it's number of expirations out
90             #Date can be ###, YYYYMM or YYYYMMDD
91 0     0 1   my $self = shift;
92 0           my $exp = shift;
93 0 0 0       return if not defined $exp or $exp < 0;
94             #Check if too many expirations out:
95 0 0 0       return if length($exp) < 4 and $exp > $#{$self->{data}};
  0            
96             #If not number of exp out, then param must be 6 or 8 chars long
97 0 0 0       return if length($exp) > 3 and length($exp) !~ /^[68]$/;
98              
99 0           $exp += 0; #Make sure it's numeric
100 0 0         return $self->{data}->[$exp]->{calls} if length $exp < 4;
101             #Param is date
102 0           foreach (@{$self->{data}}) {
  0            
103 0 0 0       return $_->{calls} if length $exp == 6 and $exp == substr($_->{exp},0,6);
104 0 0 0       return $_->{calls} if length $exp == 8 and $exp == $_->{exp};
105             }
106 0           return;
107             }
108              
109             sub puts {
110             #Return all puts for a given expiration
111             #See calls() above
112 0     0 1   my $self = shift;
113 0           my $exp = shift;
114 0 0 0       return if not defined $exp or $exp < 0;
115 0 0 0       return if length($exp) < 4 and $exp > $#{$self->{data}};
  0            
116 0 0 0       return if length($exp) > 3 and length($exp) !~ /^[68]$/;
117              
118 0           $exp += 0; #Make sure it's numeric
119 0 0         return $self->{data}->[$exp]->{puts} if length $exp < 4;
120 0           foreach (@{$self->{data}}) {
  0            
121 0 0 0       return $_->{puts} if length $exp == 6 and $exp == substr($_->{exp},0,6);
122 0 0 0       return $_->{puts} if length $exp == 8 and $exp == $_->{exp};
123             }
124 0           return;
125             }
126              
127             sub callsymbols {
128             #Return arrayref with all call symbols for a given expiration
129 0     0 1   my $self = shift;
130 0           my $exp = shift;
131 0 0         return if $exp < 0;
132 0 0 0       return unless defined $exp and $exp <= $#{$self->data};
  0            
133 0           $exp+=0;
134              
135 0           my $ret = [];
136 0           push @$ret, $_->{symbol} foreach @{$self->{data}->[$exp]->{calls}};
  0            
137 0           return $ret;
138             }
139              
140             sub putsymbols {
141             #Return arrayref with all put symbols for a given expiration
142 0     0 1   my $self = shift;
143 0           my $exp = shift;
144 0 0         return if $exp < 0;
145 0 0 0       return unless defined $exp and $exp <= $#{$self->data};
  0            
146 0           $exp+=0;
147              
148 0           my $ret = [];
149 0           push @$ret, $_->{symbol} foreach @{$self->{data}->[$exp]->{puts}};
  0            
150 0           return $ret;
151             }
152              
153             sub callstrikes {
154             #Return arrayref with all call strike prices for a given expiration
155 0     0 1   my $self = shift;
156 0           my $exp = shift;
157 0 0         return if $exp < 0;
158 0 0 0       return unless defined $exp and $exp <= $#{$self->data};
  0            
159 0           $exp+=0;
160              
161 0           my $ret = [];
162 0           push @$ret, $_->{strike} foreach @{$self->{data}->[$exp]->{calls}};
  0            
163 0           return $ret;
164             }
165              
166             sub putstrikes {
167             #Return arrayref with all put strike prices for a given expiration
168 0     0 1   my $self = shift;
169 0           my $exp = shift;
170 0 0         return if $exp < 0;
171 0 0 0       return unless defined $exp and $exp <= $#{$self->data};
  0            
172 0           $exp+=0;
173              
174 0           my $ret = [];
175 0           push @$ret, $_->{strike} foreach @{$self->{data}->[$exp]->{puts}};
  0            
176 0           return $ret;
177             }
178              
179             #-------------------------------------------------------------------
180             sub option {
181             #Retrieve a single option
182 0     0 1   my $self = shift;
183 0           my $sym = shift;
184 0 0         return unless $sym;
185              
186 0           my $ret = undef;
187 0           my $date = undef;
188 0           my $opt = undef;
189 0           MAIN: for my $exp (@{$self->{data}}) {
  0            
190 0           $date = $exp->{exp};
191 0           for my $o (@{$exp->{calls}}) {
  0            
192 0 0         if (lc $o->{symbol} eq lc $sym) {
193 0           $opt = $o;
194 0           last MAIN;
195             }
196             }
197 0           for my $o (@{$exp->{puts}}) {
  0            
198 0 0         if (lc $o->{symbol} eq lc $sym) {
199 0           $opt = $o;
200 0           last MAIN;
201             }
202             }
203             }
204             #Copy the found option to a new annonymous hash
205             #Since we need to add the {exp} key
206 0 0         if ($opt) {
207 0           $ret = {};
208 0           %$ret = %$opt;
209 0           $ret->{exp} = $date;
210             }
211 0           return $ret;
212             }
213              
214             sub success {
215             #Set or retrieve success
216 0     0 1   my $self = shift;
217 0           my $stat = shift;
218 0 0         if (defined $stat) {
219 0           $self->{success} = $stat;
220             }
221 0           return $self->{success};
222             }
223              
224             sub status {
225             #Set or retrieve status
226 0     0 1   my $self = shift;
227 0           my $stat = shift;
228 0 0         $self->{status} = $stat if defined $stat;
229 0           return $self->{status};
230             }
231              
232             sub response {
233             #Set or retrieve response
234 0     0 1   my $self = shift;
235 0           my $stat = shift;
236 0 0         $self->{response} = $stat if defined $stat;
237 0           return $self->{response};
238             }
239              
240             sub proxy {
241             #Set or retrieve proxy setting
242 0     0 1   my $self = shift;
243 0           my $stat = shift;
244 0 0         $self->{proxy} = $stat if defined $stat;
245 0           return $self->{proxy};
246             }
247              
248             sub data {
249             #Return reference to data hash
250 0     0 1   my $self = shift;
251 0           return $self->{data};
252             }
253              
254             sub version {
255             #Return version number
256 0     0 1   my $self = shift;
257 0           return $VERSION;
258             }
259              
260             #-------------------------------------------------------------------
261             sub getyahoodata {
262             #
263             # Main query page:
264             # http://finance.yahoo.com/q/op?s=DIA
265             # Additional expirations:
266             # http://finance.yahoo.com/q/op?s=DIA&m=2007-06
267             #
268             # The main query page yields options for only the next expiration.
269             # At the top of those tables is a list of other expiration months.
270             # Generate the URLs for those additional pages and visit them
271             # in turn to get all the options data.
272             #
273 0     0 0   my $self = shift;
274 0   0       my $expirations = shift || -1; # how many expirations to retrieve, < 0 means all
275 0           my $q = LWP::UserAgent->new(
276             agent => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
277             timeout => 60,
278             );
279              
280             #Set proxy if user has provided one
281 0 0         $q->proxy(['http'], $self->proxy) if $self->proxy;
282              
283 0 0         return unless $self->symbol;
284 0           my $sym = uc $self->symbol;
285              
286 0           my $response = $q->get("http://finance.yahoo.com/q/op?s=$sym");
287             #Copy the LWP status to this instance
288 0   0       $self->{success} = $response && $response->is_success;
289 0 0         return unless $self->{success};
290 0           $self->{response} = $response;
291 0           $self->{status} = $response->code;
292 0           my $content = $response->content;
293 0 0         return if $content =~ /there are no all markets results for/i; # MDER.PK
294              
295              
296 0           my $tnum;
297 0           my $st = HTML::TokeParser->new(\$content);
298 0           my $ret;
299             my $text;
300 0           local ($_,$1,$2,$3,$4,$5); #Localizing special variables is recommended under mod_perl
301              
302             #
303             # First look at the DIV tags to find 'View By Expiration'. Parse out
304             # the list of expiration months. Create @optmonths containing expiration
305             # months. Main loop will pop these off one by one, retrieve that page
306             # and add the data to the data object.
307             #
308              
309 0           my %month2num = qw(jan 01 feb 02 mar 03 apr 04 may 05 jun 06
310             jul 07 aug 08 sep 09 oct 10 nov 11 dec 12);
311 0           my %lmonth2num = qw(january 01 february 02 march 03 april 04 may 05 june 06
312             july 07 august 08 september 09 october 10 november 11 december 12);
313 0           my @optmonths = ('start');
314             #Hash to translate Yahoo's column headers to our standard hash keys
315 0           my %xheaders = (
316             strike => 'strike',
317             symbol => 'symbol',
318             bid => 'bid',
319             ask => 'ask',
320             last => 'last',
321             vol => 'volume',
322             open_int => 'open',
323             chg => 'change'
324             );
325              
326 0           my $expdate = '';
327             # @{$calldata} and @{$putdata} are arrays of hashes
328 0           my $calldata = [];
329 0           my $putdata = [];
330              
331 0   0       MAIN: while (@optmonths and $expirations) { # keeps looping if $expirations was -1
332 0 0         if ($optmonths[0] eq 'start') {
333             #First time here, we're on the main query page. Extract expirations
334             #months and populate @optmonths
335 0           $expirations--;
336 0           GETEXP: while ($st->get_tag('div')) {
337 0           $text=$st->get_trimmed_text('/div');
338 0 0         if ($text =~ /view by expiration/i) {
339             #Get expiration months
340 0           my ($exp) = $text =~ /view by expiration(.*)call options/i;
341 0           @optmonths = split(/\|/,$exp);
342             #Convert 'Jan 01' format to 'YYYY-MM'
343             #Yahoo uses *both* short 'Jan' and long 'January'
344 0           for (@optmonths) {
345 0 0         last unless /(\w{3,9})\s+(\d{2,4})/;
346 0 0         if (length($1) == 3) {
347             #short month name or May
348 0 0         $_ = ($2 < 100 ? 2000+$2 : $2) . '-' . $month2num{lc $1};
349             } else {
350             #long month name
351 0 0         $_ = ($2 < 100 ? 2000+$2 : $2) . '-' . $lmonth2num{lc $1};
352             }
353             };
354 0           shift @optmonths; #The first month is the page we're already at
355 0           last GETEXP;
356             }
357             }
358             } else {
359             #@optmonths has been populated, shift off the next month
360             #and retrieve that page. When @optmonths is empty, we're done.
361             #Additional months are at http://finance.yahoo.com/q/op?s=DIA&m=2007-06
362 0           my $month = shift @optmonths;
363 0           $expirations--;
364 0           $response = $q->get("http://finance.yahoo.com/q/op?s=$sym&m=$month");
365 0           $expdate = '';
366             #Copy the LWP status to this instance
367 0           $self->{success} = $response->is_success;
368 0           $self->{status} = $response->code;
369 0           $self->{response} = $response;
370 0 0         next MAIN unless $self->{success};
371             }
372              
373             # There's something like 25 or 26 tables present. We're only looking for
374             # four of them: the Calls header and data tables and the Puts header
375             # and data tables.
376             #
377             # We'll use HTML::TokeParser's ability to go from
378             # even though the rows might be in different tables.
379             # This requires a specific order of tables: calls header then
380             # calls data then puts header then puts data.
381             #
382             # Look at the first TD cell in a table to determine if it's one we want:
383             # 'Call Options' is the header table for calls and
384             # 'Put Options' is the header table for puts. The *next* table after the
385             # header table that starts with 'Strike' is the data table for that
386             # category. Use $mode to tell which table we're currently looking for.
387              
388             #Reset the TokeParser object so we can scan by tables
389 0           $content = $response->content;
390 0           $st = HTML::TokeParser->new(\$content);
391 0           my ($tag,$newrow,$colcnt) = ('',0,0);
392 0           my @callheaders = ();
393 0           my @putheaders = ();
394 0           $calldata = [];
395 0           $putdata = [];
396              
397 0           $st->get_tag('table'); #Jump to first table
398 0           my $mode='start';
399 0           ROW: while ($tag=$st->get_tag('tr','/table','/html')) {
400             #TokeParser returns arrayref if found, undef if no more tags
401 0           $tag = $tag->[0];
402 0 0 0       last MAIN if $tag =~ /\/html/i or not $tag;
403             #Finished when getting put data but found end of table
404             #Some options pages (e.g. CENTA) have no Puts, just the header line
405 0 0 0       last ROW if $tag =~ /\/table/i and $mode =~ /gputdata/;
406             #First loop: Getting Rows
407 0           $newrow=1;
408 0           CELL: while ($tag=$st->get_tag('th', 'td','/tr','/html')) {
409             #Second loop: getting table cells
410             #As of 2010-08-31 Yahoo using tags
411 0   0       my $in_the_money = 0+ (ref $tag->[1] &&
412             exists $tag->[1]->{class} && $tag->[1]->{class} eq 'yfnc_h');
413 0           $tag = $tag->[0];
414              
415 0 0         last MAIN if $tag =~ /\/html/i; #No data returned
416 0 0         last CELL if $tag =~ /\/tr/i; #last cell in row
417 0           $text=$st->get_trimmed_text('/th', '/td');
418              
419             #Perform cleanup & set mode between new rows
420 0 0         if ($newrow) {
421 0 0 0       if ($mode =~ /start|gcalldata/ and
    0 0        
    0 0        
    0 0        
    0 0        
    0 0        
      0        
      0        
422             $text =~ /call options|put options/i) {
423             #Found Header Table
424 0 0         $mode='gcalldate' if $text =~ /call options/i;
425 0 0         $mode='gputdate' if $text =~ /put options/i;
426 0           $newrow=0;
427 0           next CELL;
428             } elsif ($mode eq 'gcalldate') {
429             #Got the expiration date in the call header
430 0           $mode = 'gcallheaders';
431 0           next ROW;
432             } elsif ($mode eq 'gputdate') {
433             #Got the expiration date in the put header
434 0           $mode = 'gputheaders';
435 0           next ROW;
436             } elsif (($mode eq 'gcallheaders' and not @callheaders) or
437             ($mode eq 'gputheaders' and not @putheaders)) {
438             #Haven't found column headers yet
439 0 0         next ROW unless $text =~ /strike/i;
440             } elsif ($mode eq 'gcalldata' or
441             ($mode eq 'gcallheaders' and @callheaders)) {
442             #Have column headers
443 0 0         next ROW unless $text; #Nothing in first cell
444             #Add a new row to @{$calldata}
445 0           push @{$calldata}, {in_the_money => $in_the_money};
  0            
446              
447 0           $mode='gcalldata';
448 0           $colcnt=0;
449             } elsif ($mode eq 'gputdata' or
450             ($mode eq 'gputheaders' and @putheaders)) {
451             #Have column headers
452             #Add a new row to @{$putdata}
453 0           push @{$putdata}, {in_the_money => $in_the_money};
  0            
454 0           $mode = 'gputdata';
455 0           $colcnt=0;
456             } else {
457             #Nothing we want in this row
458 0           next ROW;
459             }
460             }
461 0           $newrow = 0;
462              
463             #Extract the data
464 0 0         if ($mode =~ /gcalldate|gputdate/) {
    0          
    0          
465 0 0 0       if ($text and not $expdate) {
466             #Extract expiration date, convert to YYYYMMDD
467             #Text Looks like Expire at close Friday, September 17, 2010
468             #$text =~ /(\w{3})\s+(\d{1,2}),\s+(\d{4})/;
469 0           $text =~ /(\w{3,9})\s+(\d{1,2}),\s+(\d{4})/;
470 0 0         if (length($1) == 3) {
471             #short month name or May
472 0           $expdate = $3 . $month2num{lc $1} . $2;
473             } else {
474             #long month name
475 0           $expdate = $3 . $lmonth2num{lc $1} . $2;
476             }
477             }
478 0 0         $mode = 'gcallheaders' if $mode eq 'gcalldate';
479 0 0         $mode = 'gputheaders' if $mode eq 'gputdate';
480             } elsif ($mode =~ /gcallheaders|gputheaders/) {
481             #Extract table headers
482             #Use %xheaders to translate to our standard headers
483 0           $text =~ s/ /_/g; #Spaces to underscores
484 0 0         push @callheaders, $xheaders{lc($text)}
485             if $mode eq 'gcallheaders';
486 0 0         push @putheaders, $xheaders{lc($text)}
487             if $mode eq 'gputheaders';
488             } elsif ($mode =~ /gcalldata|gputdata/) {
489             #cleanup $text
490 0           $text =~ s/,//g; #Remove commas
491 0 0         if ($text =~ /(up|down)\s+(\d*.?\d*)/i) {
492             #This is the Chg column
493             #Convert 'Up/Down' to + or -
494 0           $text = $2;
495 0 0         $text*=-1 if $1=~/down/i;
496             }
497             #Insert the data
498             #Remove the '.X' Yahoo appends to symbol
499 0 0         if ($mode eq 'gcalldata') {
500 0           $calldata->[-1]->{$callheaders[$colcnt]} = $text;
501             # $calldata->[-1]->{symbol} =~ s/\.X$//i
502             # if $callheaders[$colcnt] eq 'symbol';
503             } else {
504 0           $putdata->[-1]->{$putheaders[$colcnt]} = $text;
505             # $putdata->[-1]->{symbol} =~ s/\.X$//i
506             # if $putheaders[$colcnt] eq 'symbol';
507             }
508 0           $colcnt++;
509             }
510             } #Getting TD
511             } #Getting TR
512              
513             #Sort calls and puts by strike price
514 0 0         if (not exists $calldata->[0]->{symbol}) {
515             # if the option has no calls, empty the array
516 0           $calldata = [];
517             }
518 0 0         if (not exists $putdata->[0]->{symbol}) {
519             # if the option has no puts (e.g. CENTA as of 2009-Feb-15), empty the array
520 0           $putdata = [];
521             }
522 0           @{$calldata} = sort { $a->{strike} <=> $b->{strike} } @{$calldata};
  0            
  0            
  0            
523 0           @{$putdata} = sort { $a->{strike} <=> $b->{strike} } @{$putdata};
  0            
  0            
  0            
524              
525             #If this expiration already exists in $self->{data}, append
526             #new data and resort, otherwise create new expiration
527 0           CHECKDUP: {
528 0           foreach (@{$self->{data}}) {
  0            
529 0 0         if ($_->{exp} == $expdate) {
530             #Duplicate present
531 0           @{$_->{calls}} = sort { $a->{strike} <=> $b->{strike} }
  0            
  0            
532 0           (@{$_->{calls}}, @{$calldata});
  0            
533 0           @{$_->{puts}} = sort { $a->{strike} <=> $b->{strike} }
  0            
  0            
534 0           (@{$_->{puts}}, @{$putdata});
  0            
535 0           last CHECKDUP; #Don't add new expiration
536             } #Duplicate expiration already present
537             }
538             #Add new expiration
539             #Only executed if no duplicates expirations present
540 0           push @{$self->{data}}, {
  0            
541             exp => $expdate,
542             calls => $calldata,
543             puts => $putdata
544             };
545             }
546              
547             #Sort data by expirations
548 0           @{$self->{data}} = sort { $a->{exp} <=> $b->{exp} } @{$self->{data}};
  0            
  0            
  0            
549              
550             } #End MAIN loop
551              
552             } #End getyahoodata
553              
554             #-------------------------------------------------------------------
555             sub getcboedata {
556 0     0 0   require WWW::Mechanize;
557             #
558             # Main query page:
559             # http://www.cboe.com/DelayedQuote/QuoteTable.aspx
560             #
561             # Get expirations from
562             # http://www.cboe.com/DelayedQuote/SimpleQuote.aspx?ticker=BQQ+OH-E
563             #
564             # Unlike Yahoo, the main query page has *all* the options available.
565             # Alas, it is lacking the expiration dates for those options.
566             # We'll drill down into the individual option page to get the date.
567             #
568             # Right now we only do this once for each 'YY MMM' format date found
569             # in the option description on the first page. We *assume* that all
570             # subsequent dates of the same format have the *same* full date.
571             #
572 0           my $self = shift;
573 0           my $q = WWW::Mechanize->new(autocheck => 0);
574 0           $q->agent_alias('Linux Mozilla');
575 0           $q->quiet(1);
576 0           $q->timeout(60);
577             #Set proxy if user has provided one
578 0 0         $q->proxy(['http', 'ftp'], $self->proxy) if $self->proxy;
579              
580 0 0         return unless $self->symbol;
581 0           my $sym = uc $self->symbol;
582              
583             #Hash to translate CBOE column headers to our standard hash keys
584 0           my %xheaders = (
585             bid => 'bid',
586             ask => 'ask',
587             last_sale => 'last',
588             vol => 'volume',
589             open_int => 'open',
590             net => 'change'
591             );
592              
593 0           $q->get("http://www.cboe.com/DelayedQuote/QuoteTable.aspx");
594 0 0         return unless $q->success;
595 0           $q->submit_form(
596             fields => { 'ucQuoteTableCtl:txtSymbol' => $sym,
597             'ucQuoteTableCtl:ALL' => 2 },
598             button => 'ucQuoteTableCtl:btnSubmit'
599             );
600             #Copy the WWW::Mechanize status to this instance
601 0           $self->{success} = $q->success;
602 0           $self->{status} = $q->status;
603 0           $self->{response} = $q->response;
604              
605 0 0         return unless $q->success;
606              
607             # Output from mech-dump to get labels above:
608             # ucQuoteTableCtl:txtSymbol= (text)
609             # ucQuoteTableCtl:chkAllExchange= (checkbox)
610             # [*/off|on/All exchange option quotes (if multiply listed)]
611             # ucQuoteTableCtl:ALL=0 (radio)
612             # [*0/List near term at-the-money options & Weeklys if avail.|
613             # 2/List all options, LEAPS & Weeklys if avail. (Single page)]
614             # ucQuoteTableCtl:btnSubmit=Submit (submit)
615              
616 0           my $tnum;
617 0           my $st = HTML::TokeParser->new(\$q->{content});
618 0           my $ret;
619 0           my ($tag,$text,$colcnt) = ('','',0);
620 0           local ($_,$1,$2,$3,$4,$5); #Localizing special variables is recommended under mod_perl
621              
622 0           my @optmonths = ();
623 0           my %months2num = qw(jan 01 feb 02 mar 03 apr 04 may 05 jun 06
624             jul 07 aug 08 sep 09 oct 10 nov 11 dec 12);
625 0           my @callheaders = ();
626 0           my @putheaders = ();
627 0           my $putscol = 0; #Column where puts data starts
628              
629 0           $st->get_tag('table'); #Jump to first table
630             #Find start of data:
631 0           HEADER: while ($st->get_tag('tr')) {
632 0           $st->get_tag('td');
633 0 0         if ($st->get_trimmed_text('/td') =~ /calls/i) {
634             #Parse out the column headers
635 0           my $mode='calls';
636 0           while (my $tag=$st->get_tag('td','/tr')) {
637             #get_tag returns undef when no more tags
638 0           $tag=@{$tag}[0];
  0            
639 0 0         last HEADER if $tag =~ /\/tr/i;
640 0           my $text = $st->get_trimmed_text('/td');
641 0           $text =~ s/ /_/g; #spaces to underscores
642 0 0         if ($text =~ /puts/i) {
643 0           $mode = 'puts';
644 0           next;
645             }
646 0 0         if ($mode eq 'calls') {
647 0           push @callheaders,$xheaders{lc $text};
648             } else {
649 0           push @putheaders,$xheaders{lc $text};
650             }
651             }
652 0           last HEADER;
653             }
654             }
655              
656             #Unlike Yahoo, the main page does not have the actual
657             #expiration date on it, just the YYMMM version. We are
658             #going to *assume* that all YYMMM expirations are the
659             #*same* actual date. The first time we hit a YYMMM date,
660             #drill down into the details for that option to extract
661             #the actual date and then use it for all subsequent
662             #YYMMM options.
663             #So, there might be a problem if there are weeklys,
664             #monthlies or quarterlies present...
665             #http://www.cboe.com/micro/weeklys/introduction.aspx
666 0           my %expirations = ();
667 0           my %tempdata = ();
668              
669 1     1   9 no warnings;
  1         2  
  1         713  
670 0           ROW: while ($tag=$st->get_tag('tr','/table')) {
671             #get_tag returns undef when no more tags
672 0           $tag=@{$tag}[0];
  0            
673 0 0         last ROW if $tag =~ /\/table/;
674 0           my $mode = 'start';
675 0           my @tmpheaders = @callheaders;
676 0           my $call = {};
677 0           my $put = {};
678 0           my $exp = '';
679 0           CELL: while ($tag=$st->get_tag('td','/tr')) {
680 0           $tag=@{$tag}[0];
  0            
681 0 0         last CELL if $tag =~ /\/tr/i;
682 0           $text=$st->get_trimmed_text('/td');
683 0 0         next ROW if $text =~ /\[img\]/i; #There's an IMG after the column headers
684              
685             #Description looks like "07 May 57.00 (IWT EE-E)"
686 0 0 0       if ($mode eq 'start' and
    0 0        
    0          
    0          
687             $text =~ /(\d{2} \w{3}) (\d{1,5}\.\d{2}) \((\w{1,4}) (\w{2})-(\w)\)/) {
688             #Found call description
689 0           $exp = $1;
690 0           $call->{strike} = $2;
691 0           $call->{symbol} = "$3$4";
692 0           my $linksym = "$3+$4";
693 0           my $type = $5;
694 0           $exp =~ s/ //g; #Back-referencing variables reset on any regex
695              
696             #Check if expiration date has already been found, if not
697             #drill down to option detail page to get it
698 0 0         unless ($expirations{$exp}) {
699 0           my $det = WWW::Mechanize->new();
700 0           $det->agent_alias('Linux Mozilla');
701 0           $det->quiet(1);
702 0           $det->get("http://www.cboe.com/DelayedQuote/SimpleQuote.aspx?ticker=$linksym-$type");
703             #Copy the WWW::Mechanize status to this instance
704 0           $self->{success} = $det->success;
705 0           $self->{status} = $det->status;
706 0           $self->{response} = $det->response;
707 0           my $dat = HTML::TokeParser->new(\$det->{content});
708 0 0         unless ($self->{success}) {
709             #Detail lookup failed. IP address probably blacklisted
710             #Manually calc 3rd Friday of month
711 0           my ($tyear,$tmon) = $exp =~ /(\d{2})(\w{3})/;
712 0           $tyear += 2000;
713 0           $tmon = lc $tmon;
714 0           my %mon2digit = qw/jan 01 feb 02 mar 03 apr 04 may 05
715             jun 06 jul 07 aug 08 sep 09 oct 10 nov 11 dec 12/;
716 1     1   1392 use Date::Calc;
  1         68640  
  1         939  
717             #DOW is 5 for Friday, 3rd occurance
718 0           my ($year,$month,$day) =
719             Date::Calc::Nth_Weekday_of_Month_Year($tyear,$mon2digit{$tmon},5,3);
720             #Pad zeros to month and day
721 0           $month = substr(100+$month,-2);
722 0           $day = substr(100+$day,-2);
723 0           $expirations{$exp} = "$year$month$day";
724             } else {
725             #Extract date from option detail page
726 0           DATETABLE: while (my $tag=$dat->get_tag('table', '/table')) {
727 0           $tag=@{$tag}[0];
  0            
728 0 0         next DATETABLE if $tag =~ /\/table/i;
729 0           my $text=$dat->get_trimmed_text('/table');
730 0 0         if ($text =~ /expiration date\s*(\d{2})\/(\d{2})\/(\d{4})/i) {
731 0           $expirations{$exp} = "$3$1$2";
732 0           last DATETABLE;
733             }
734             }
735             }
736             }
737              
738 0           $mode = 'call';
739             } elsif ($mode eq 'call' and
740             $text =~ /(\d{2} \w{3}) (\d{1,5}\.\d{2}) \((\w{1,4}) (\w{2})-\w\)/) {
741             #Found put description
742 0 0         $exp = $1 unless $exp; #Should have found it with call
743 0           $put->{strike} = $2;
744 0           $put->{symbol} = "$3$4";
745 0           $exp =~ s/ //g; #Back-referencing variables reset on any regex
746              
747 0           $mode = 'put';
748 0           @tmpheaders = @putheaders;
749             } elsif ($mode eq 'call') {
750 0           $call->{shift @tmpheaders} = $text;
751             } elsif ($mode eq 'put') {
752 0           $put->{shift @tmpheaders} = $text;
753             } else {
754             #This should never happen
755             #print "ERROR parsing CBOE data!!!!\nText: $text\n";
756             }
757             } #Get TD
758              
759             #Move put and call to proper location using $exp
760 0 0         unless ($tempdata{$exp}->{exp}) {
761             #Create new expiration in %tempdata
762 0           $tempdata{$exp}->{exp} = $expirations{$exp};
763 0           $tempdata{$exp}->{calls} = [];
764 0           $tempdata{$exp}->{puts} = [];
765             }
766             #Move hashrefs into %tempdata
767 0           push @{$tempdata{$exp}->{calls}},$call;
  0            
768 0           push @{$tempdata{$exp}->{puts}},$put;
  0            
769              
770             } #Get TR
771              
772             #Sort %tempdata by expiration dates and move into @{$self->{data}}
773 0           push @{$self->{data}}, $tempdata{$_} for
  0            
  0            
774             sort { $tempdata{$a}->{exp} <=> $tempdata{$b}->{exp} }
775             keys %tempdata;
776              
777             #Sort puts and calls at each expiration by strike price
778 0           for (@{$self->{data}}) {
  0            
779 0           @{$_->{calls}} =
  0            
780 0           sort { $a->{strike} <=> $b->{strike} } @{$_->{calls}};
  0            
781 0           @{$_->{puts}} =
  0            
782 0           sort { $a->{strike} <=> $b->{strike} } @{$_->{puts}};
  0            
783             }
784              
785             } #End getcboedata
786              
787             1;
788             __END__