File Coverage

blib/lib/App/Chart/Yahoo.pm
Criterion Covered Total %
statement 33 35 94.2
branch n/a
condition n/a
subroutine 12 12 100.0
pod n/a
total 45 47 95.7


line stmt bran cond sub pod time code
1             # Copyright 2007, 2008, 2009, 2010, 2011, 2015, 2016, 2017 Kevin Ryde
2              
3             # This file is part of Chart.
4             #
5             # Chart is free software; you can redistribute it and/or modify it under the
6             # terms of the GNU General Public License as published by the Free Software
7             # Foundation; either version 3, or (at your option) any later version.
8             #
9             # Chart is distributed in the hope that it will be useful, but WITHOUT ANY
10             # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11             # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12             # details.
13             #
14             # You should have received a copy of the GNU General Public License along
15             # with Chart. If not, see <http://www.gnu.org/licenses/>.
16              
17              
18             package App::Chart::Yahoo;
19 1     1   78913 use 5.010;
  1         3  
20 1     1   4 use strict;
  1         1  
  1         18  
21 1     1   4 use warnings;
  1         1  
  1         24  
22 1     1   4 use Carp;
  1         1  
  1         46  
23 1     1   224 use Date::Calc;
  1         6091  
  1         34  
24 1     1   253 use Date::Parse;
  1         5628  
  1         106  
25 1     1   6 use List::Util qw (min max);
  1         2  
  1         73  
26 1     1   289 use POSIX ();
  1         4574  
  1         24  
27 1     1   222 use URI::Escape;
  1         1319  
  1         79  
28 1     1   361 use Locale::TextDomain ('App-Chart');
  1         19278  
  1         7  
29              
30 1     1   6108 use Tie::TZ;
  1         464  
  1         32  
31 1     1   305 use App::Chart;
  0            
  0            
32             use App::Chart::Database;
33             use App::Chart::Download;
34             use App::Chart::DownloadHandler;
35             use App::Chart::DownloadHandler::IndivChunks;
36             use App::Chart::IntradayHandler;
37             use App::Chart::Latest;
38             use App::Chart::Sympred;
39             use App::Chart::TZ;
40             use App::Chart::Weblink;
41              
42             # uncomment this to run the ### lines
43             # use Smart::Comments;
44              
45             use constant DEBUG => 0;
46              
47              
48             # .X or .XY or no suffix
49             our $yahoo_pred = App::Chart::Sympred::Proc->new
50             (sub {
51             my ($symbol) = @_;
52             return ($symbol !~ /\.(FQ|LJ)$/
53             && $symbol =~ /[.=]..?$|^[^.]+$/);
54             });
55              
56             my $download_pred = App::Chart::Sympred::Any->new ($yahoo_pred);
57             our $latest_pred = App::Chart::Sympred::Any->new ($yahoo_pred);
58             our $index_pred = App::Chart::Sympred::Regexp->new (qr/^\^|^0.*\.SS$/);
59             my $futures_pred = App::Chart::Sympred::Any->new;
60              
61             # max symbols to any /q? quotes request
62             # Finance::Quote::Yahoo uses a limit of 40 to stop the url getting too
63             # long, which apparently some servers or proxies can't handle
64             use constant MAX_QUOTES => 40;
65              
66             # overridden by specific nodes
67             App::Chart::setup_source_help
68             ($yahoo_pred, __p('manual-node','Yahoo Finance'));
69              
70              
71             #-----------------------------------------------------------------------------
72             # web link - basic quote page
73             #
74             # Eg. http://finance.yahoo.com/q?s=BHP.AX
75             #
76             # The accelerator is "_Y" so as not to clash with "_S" for stock on various
77             # stock exchange links like "ASX IRM _Stock Information"
78              
79             App::Chart::Weblink->new
80             (pred => $yahoo_pred,
81             name => __('_Yahoo Stock Page'),
82             desc => __('Open web browser at the Yahoo quote page for this stock'),
83             proc => sub {
84             my ($symbol) = @_;
85             return "http://"
86             . App::Chart::Database->preference_get ('yahoo-quote-host',
87             'finance.yahoo.com')
88             . "/q?s="
89             . URI::Escape::uri_escape($symbol);
90             });
91              
92              
93             #-----------------------------------------------------------------------------
94             # misc
95              
96              
97             # (if (and (yahoo-futures-symbol? symbol)
98             # (not (chart-symbol-mdate symbol)))
99             # (let* ((want-tdate (adate->tdate
100             # (first
101             # (yahoo-quote-adate-time symbol ""))))
102             # (mdate (or (latest-symbol-mdate-nodownload symbol
103             # want-tdate)
104             # (begin
105             # (weblink-message
106             # (_ "Finding front month ..."))
107             # (latest-symbol-mdate symbol want-tdate)))))
108             # (if mdate # might still be unknown
109             # (set! symbol
110             # (string-append (chart-symbol-commodity symbol)
111             # (mdate->MYY-str mdate)
112             # (chart-symbol-suffix symbol))))))
113              
114              
115             #-----------------------------------------------------------------------------
116             # Exchanges page for quote delays
117             #
118             # This looks at the exchanges page
119             #
120             use constant EXCHANGES_URL => 'https://help.yahoo.com/kb/SLN2310.html';
121              
122             # Exchanges page was previously http://finance.yahoo.com/exchanges, but why
123             # would they keep it the same when breaking everybody's links would be
124             # better.
125              
126             # refetch the exchanges page after this many days
127             use constant EXCHANGES_UPDATE_DAYS => 7;
128              
129             # containing arefs [$pred,'.XX']
130             my @quote_delay_aliases;
131              
132             sub setup_quote_delay_alias {
133             my ($pred, $suffix) = @_;
134             push @quote_delay_aliases, [ $pred, $suffix ];
135             }
136              
137             sub symbol_quote_delay {
138             my ($symbol) = @_;
139              
140             # indexes all in real time
141             if ($index_pred->match($symbol)) {
142             return 0;
143             }
144              
145             my $suffix = App::Chart::symbol_suffix ($symbol);
146             my $h = exchanges_data();
147             my $delay = $h->{$suffix};
148              
149             if (! defined $delay) {
150             if (my $elem = List::Util::first { $_->[0]->match ($symbol) }
151             @quote_delay_aliases) {
152             $suffix = $elem->[1];
153             $delay = $h->{$suffix};
154             }
155             }
156             if (! defined $delay) {
157             # guess default 20 minutes
158             $delay = 20;
159             }
160             return $delay;
161             }
162              
163             # return a hashref of exchange delay data like { '.AX' => 20, '.BI' => 15 }
164             sub exchanges_data {
165             require App::Chart::Pagebits;
166             return App::Chart::Pagebits::get
167             (name => __('Yahoo exchanges page'),
168             url => EXCHANGES_URL,
169             key => 'yahoo-quote-delays',
170             freq_days => EXCHANGES_UPDATE_DAYS,
171             parse => \&exchanges_parse);
172             }
173             sub exchanges_parse {
174             my ($content) = @_;
175             my $h = {};
176              
177             require HTML::TableExtract;
178             my $te = HTML::TableExtract->new (headers => ['Suffix', 'Delay']);
179             $te->parse($content);
180             if (! $te->tables) {
181             warn "Yahoo exchanges page unrecognised, assuming 15 min quote delay";
182             return $h;
183             }
184              
185             foreach my $row ($te->rows) {
186             my $suffix = $row->[0];
187             my $delay = $row->[1];
188             next if ($suffix eq 'N/A');
189              
190             # eg "15 min"
191             # or "15 min**" with footnote
192             #
193             if ($delay =~ /^(\d+) min/) {
194             $delay = $1;
195             } elsif ($delay =~ /real/i) {
196             $delay = 0;
197             } else {
198             warn "Yahoo exchanges page unrecognised delay: \"$delay\"\n";
199             next;
200             }
201              
202             $h->{$suffix} = $delay + 0;
203             }
204             return $h;
205             }
206              
207              
208             #------------------------------------------------------------------------------
209             # Quotes bits generally.
210             #
211             # This uses the csv format quotes like
212             #
213             # http://download.finance.yahoo.com/d/quotes.csv?f=snl&e=.csv&s=BHP.AX
214             #
215             # The "f" field keys can be found at the following (open an account to get
216             # to them).
217             #
218             # http://edit.my.yahoo.com/config/edit_pfview?.vk=v1
219             # http://edit.finance.yahoo.com/e9?.intl=au
220             #
221             # http://download.finance.yahoo.com/d?
222             # s= # symbol
223             # f= # format, concat of the following
224             # s # symbol
225             # n # company name
226             # l1 # last price
227             # d1 # last trade date (in home exchange's timezone)
228             # t1 # last trade time (in yahoo server timezone)
229             # c1 # change
230             # p2 # percent change
231             # v # volume
232             # a2 # average daily volume
233             # b # bid
234             # b6 # bid size
235             # a # ask
236             # a5 # ask size
237             # k1 # "time - last" (ECN), with <b> and <i> markup
238             # c6 # change (ECN)
239             # m2 # day's range (ECN)
240             # b3 # bid (ECN)
241             # b2 # ask (ECN)
242             # p # previous close
243             # o # today's open
244             # m # day's range, eg. "1.23 - 4.56"
245             # w # 52-week range, eg. "1.23 - 4.56"
246             # e # earnings per share
247             # r # p/e ratio
248             # d # div per share
249             # q # ex div date, eg. "Mar 31" or "N/A"
250             # r1 # div pay date
251             # y # div yield
252             # j1 # market cap
253             # x # stock exchange
254             # c4 # currency, eg. "AUD"
255             # i # more info links, letters
256             # # c=chart, n=news, p=profile, r=research, i=insider,
257             # # m=message board (yahoo)
258             # k # 52-week high
259             #
260             # Don't know what the distinction between b,a and b3,b2 quotes are actually
261             # meant to be.
262             # - For the Australian Stock Exchange, b,a are "N/A", and b3,b2 is the
263             # SEATS best quote.
264             # - For US stocks b,a seem to be "N/A", and b3,b2 an ECN quote. The
265             # latter has been seen a long way away from from recent trades though,
266             # eg. in BRK-A.
267             #
268             # d1,t1 are a bit odd, the time is the yahoo server's zone, but the date
269             # seems to be always GMT. The zone for the time can be seen easily by
270             # looking at a quote from the various international XX.finance.yahoo.com.
271             # For the zone for the date however you need to be watching at midnight
272             # GMT, where it ticks over (at all the international XX.finance.yahoo.com).
273              
274              
275             # quote_parse_div_date ($str) returns an iso YYYY-MM-DD date string for a
276             # dividend $str coming from quote.csv data, or undef if none. There are
277             # several different formats,
278             # "Jan 7" # finance.yahoo.com
279             # " 5 Jan" # au.finance, uk.finance
280             # "24-Sep-04" # ABB.AX on finance.yahoo.com
281             # "24 Sep, 2004" # ABB.AX on au.finance
282             # "Sep 24, 2004" # ABB.AX on ca.finance
283             #
284             # An error is thrown for an unrecognised string, don't want some new form to
285             # end up with dividends silently forgotten.
286             #
287             sub quote_parse_div_date {
288             my ($str) = @_;
289             if (DEBUG) { print "quote_parse_div_date() '$str'\n"; }
290             if (! defined $str || $str eq 'N/A' || $str eq '') {
291             return undef; # no info
292             }
293              
294             my ($ss,$mm,$hh,$day,$month,$year,$zone) = Date::Parse::strptime ($str);
295             $month++;
296             if ($year) {
297             $year += 1900;
298             if ($year < 2000) { # "04" returned as 1904, bump to 2004
299             $year += 100;
300             }
301             } else {
302             # year not given, try nearest
303             $year = App::Chart::Download::month_to_nearest_year ($month);
304             }
305             if (! Date::Calc::check_date ($year, $month, $day)) {
306             warn "Yahoo invalid dividend date '$str'";
307             }
308             return App::Chart::ymd_to_iso ($year, $month, $day);
309             }
310              
311             #------------------------------------------------------------------------------
312             # latest
313             #
314             # wget -S -O /dev/stdout 'http://download.finance.yahoo.com/d/quotes.csv?f=snc4b3b2d1t1oml1c1vqdx&e=.csv&s=GM'
315             #
316              
317             use constant DEFAULT_DOWNLOAD_HOST => 'download.finance.yahoo.com';
318              
319             App::Chart::LatestHandler->new
320             (pred => $latest_pred,
321             proc => \&latest_download,
322             max_symbols => MAX_QUOTES);
323              
324             sub latest_download {
325             my ($symbol_list) = @_;
326              
327             App::Chart::Download::status
328             (__x('Yahoo quotes {symbol_range}',
329             symbol_range =>
330             App::Chart::Download::symbol_range_string ($symbol_list)));
331              
332             my $host = App::Chart::Database->preference_get
333             ('yahoo-quote-host', DEFAULT_DOWNLOAD_HOST);
334             my $url = "http://$host/d/quotes.csv?f=snc4b3b2d1t1oml1c1vqdx&e=.csv&s="
335             . join (',', map { URI::Escape::uri_escape($_) } @$symbol_list);
336              
337             my $resp = App::Chart::Download->get ($url);
338             App::Chart::Download::write_latest_group (latest_parse ($resp));
339             }
340              
341             sub latest_parse {
342             my ($resp) = @_;
343             my $content = $resp->decoded_content (raise_error => 1);
344             ### Yahoo quotes: $content
345              
346             my @data = ();
347             my $h = { source => __PACKAGE__,
348             resp => $resp,
349             prefer_decimals => 2,
350             date_format => 'mdy', # eg. '6/26/2015'
351             data => \@data };
352              
353             require Text::CSV_XS;
354             my $csv = Text::CSV_XS->new;
355             foreach my $line (App::Chart::Download::split_lines ($content)) {
356             $csv->parse($line);
357             ### csv fields: $csv->fields()
358             my ($symbol, $name, $currency, $bid, $offer, $last_date, $last_time,
359             $open, $range, $last, $change, $volume,
360             $div_date, $div_amount, $exchange)
361             = $csv->fields();
362             if (! defined $symbol) {
363             # blank line maybe
364             print "Yahoo quotes blank line maybe:\n---\n$content\n---\n";
365             next;
366             }
367              
368             # for unknown stocks the name is a repeat of the symbol, which is pretty
369             # useless
370             if ($name eq $symbol) { $name = undef; }
371              
372             my ($low, $high) = split /-/, $range;
373             my $quote_delay_minutes = symbol_quote_delay ($symbol);
374              
375             # have seen wildly garbage date for unknown symbols, like
376             # GC.CMX","GC.CMX","MRA",N/A,N/A,"8/352/19019","4:58am",N/A,"N/A - N/A",0.00,N/A,N/A,"N/A",N/A,"N/A
377             # depending what else in the same request ...
378             #
379              
380             # In the past date/times were in New York timezone, for shares anywhere
381             # in the world. The Chart database is in the timezone of the exchange.
382             # As of June 2015 believe Yahoo is now also the exchange timezone so no
383             # transformation.
384             #
385             # my $symbol_timezone = App::Chart::TZ->for_symbol ($symbol);
386             # ($last_date, $last_time)
387             # = quote_parse_datetime ($last_date, $last_time,
388             # App::Chart::TZ->newyork,
389             # $symbol_timezone);
390              
391             # dividend is "0.00" for various unknowns or estimates, eg. from ASX
392             # trusts
393             if (App::Chart::Download::str_is_zero ($div_amount)) {
394             $div_amount = __('unknown');
395             }
396              
397             # dividend shown only if it's today
398             # don't show if no last_date, just in case have a div_date but no
399             # last_date for some reason
400             $div_date = quote_parse_div_date ($div_date);
401             if (! ($div_date && $last_date && $div_date eq $last_date)) {
402             $div_amount = undef;
403             }
404              
405             push @data, { symbol => $symbol,
406             name => $name,
407             exchange => $exchange,
408             currency => $currency,
409              
410             quote_delay_minutes => $quote_delay_minutes,
411             bid => $bid,
412             offer => $offer,
413              
414             last_date => $last_date,
415             last_time => $last_time,
416             open => $open,
417             high => $high,
418             low => $low,
419             last => $last,
420             change => $change,
421             volume => $volume,
422             dividend => $div_amount,
423             };
424             }
425              
426             ### $h
427             return $h;
428             }
429              
430             sub mktime_in_zone {
431             my ($sec, $min, $hour, $mday, $mon, $year, $zone) = @_;
432             my $timet;
433              
434             { local $Tie::TZ::TZ = $zone->tz;
435             $timet = POSIX::mktime ($sec, $min, $hour,
436             $mday, $mon, $year, 0,0,0);
437             my ($Xsec,$Xmin,$Xhour,$Xmday,$Xmon,$Xyear,$wday,$yday,$isdst)
438             = localtime ($timet);
439             return POSIX::mktime ($sec, $min, $hour,
440             $mday, $mon, $year, $wday,$yday,$isdst);
441             }
442             }
443              
444             # $date is dmy like 7/15/2007, in GMT
445             # $time is h:mp like 10:05am, in $server_zone
446             #
447             # return ($date, $time) iso strings like ('2008-06-11', '10:55:00') in
448             # $want_zone
449             #
450             sub quote_parse_datetime {
451             my ($date, $time, $server_zone, $want_zone) = @_;
452             if (DEBUG) { print "quote_parse_datetime $date, $time\n"; }
453             if ($date eq 'N/A' || $time eq 'N/A') { return (undef, undef); }
454              
455             my ($sec,$min,$hour,$mday,$mon,$year)
456             = Date::Parse::strptime($date . ' ' . $time);
457             $sec //= 0; # undef if not present
458             if (DEBUG) { print " parse $sec,$min,$hour,$mday,$mon,$year\n"; }
459              
460             my $timet = mktime_in_zone ($sec, $min, $hour,
461             $mday, $mon, $year, $server_zone);
462             if (DEBUG) {
463             print " timet Serv ",do { local $Tie::TZ::TZ = $server_zone->tz;
464             POSIX::ctime($timet) };
465             print " timet GMT ",do { local $Tie::TZ::TZ = 'GMT';
466             POSIX::ctime($timet) };
467             }
468              
469             my ($gmt_sec,$gmt_min,$gmt_hour,$gmt_mday,$gmt_mon,$gmt_year,$gmt_wday,$gmt_yday,$gmt_isdst) = gmtime ($timet);
470              
471             if ($gmt_mday != $mday) {
472             if (DEBUG) { print " mday $mday/$mon cf gmt_mday $gmt_mday/$gmt_mon, at $timet\n"; }
473             if (cmp_modulo ($gmt_mday, $mday, 31) < 0) {
474             $mday++;
475             } else {
476             $mday--;
477             }
478             $timet = mktime_in_zone ($sec, $min, $hour,
479             $mday, $mon, $year, $server_zone);
480             if (DEBUG) { print " switch to $mday giving $timet = $timet\n"; }
481             if (DEBUG) {
482             print " timet GMT ",do { local $Tie::TZ::TZ = 'GMT';
483             POSIX::ctime($timet) };
484             print " timet Targ ",do { local $Tie::TZ::TZ = $want_zone->tz;
485             POSIX::ctime($timet) };
486             }
487             }
488             return $want_zone->iso_date_time ($timet);
489             }
490              
491             sub cmp_modulo {
492             my ($x, $y, $modulus) = @_;
493             my $half = int ($modulus / 2);
494             return (($x - $y + $half) % $modulus) <=> $half;
495             }
496              
497             sub decode_hms {
498             my ($str) = @_;
499             my ($hour, $minute, $second) = split /:/, $str;
500             if (! defined $second) { $second = 0; }
501             return ($hour, $minute, $second);
502             }
503              
504              
505             #-----------------------------------------------------------------------------
506             # download
507             #
508             # This uses the historical prices page like
509             #
510             # https://finance.yahoo.com/quote/AMP.AX/history?p=AMP.AX
511             #
512             # which puts a cookie like
513             #
514             # Set-Cookie: B=fab5sl9cqn2rd&b=3&s=i3; expires=Sun, 03-Sep-2018 04:56:13 GMT; path=/; domain=.yahoo.com
515             #
516             # and contains buried within a mountain of hideous script
517             #
518             # "CrumbStore":{"crumb":"hdDX\u002FHGsZ0Q"}
519             #
520             # The \u002F is backslash character etc which is script string for "/"
521             # character. The crumb is included in a CSV download query like
522             #
523             # https://query1.finance.yahoo.com/v7/finance/download/AMP.AX?period1=1503810440&period2=1504415240&interval=1d&events=history&crumb=hdDX/HGsZ0Q
524             #
525             # period1 is the start time, period2 the end time, both as Unix seconds
526             # since 1 Jan 1970. Not sure of the timezone needed. Some experiments
527             # suggest it depends on the timezone of the symbol. http works as well as
528             # https. The result is like
529             #
530             # Date,Open,High,Low,Close,Adj Close,Volume
531             # 2017-09-07,30.299999,30.379999,30.000000,30.170000,30.170000,3451099
532             #
533             # The "9999s" are some dodgy rounding off to what should be usually at most
534             # 3 decimal places.
535             #
536             # Response is 404 if no such symbol, 401 unauthorized if no cookie or crumb.
537             #
538             # "events=div" gives dividends like
539             #
540             # Date,Dividends
541             # 2017-08-11,0.161556
542             #
543             # "events=div" gives splits like, for a consolidation (GXY.AX)
544             #
545             # Date,Stock Splits
546             # 2017-05-22,1/5
547             #
548             #----------------
549             # For reference, there's a similar further which is json format (%7C = "|")
550             #
551             # https://query2.finance.yahoo.com/v8/finance/chart/IBM?formatted=true&lang=en-US&region=US&period1=1504028419&period2=1504428419&interval=1d&events=div%7Csplit&corsDomain=finance.yahoo.com
552             #
553             # This doesn't require a cookie and crumb, has some info like symbol
554             # timezone. The numbers look like they're rounded through 32-bit floating
555             # point, for example "142.55999755859375" which is 142.55 in a 23-bit
556             # mantissa. log(14255000)/log(2) = 23.76 bits
557             #
558             # All prices look like they are split-adjusted, which is ok if that's what
559             # you ant and are downloading a full data set, but bad for incremental since
560             # you don't know when a change is applied.
561             #
562              
563             App::Chart::DownloadHandler->new
564             (name => __('Yahoo'),
565             pred => $download_pred,
566             available_tdate_by_symbol => \&daily_available_tdate,
567             available_tdate_extra => 2,
568             url_and_cookiejar_func => \&daily_url_and_cookiejar,
569             proc => \&daily_download,
570             chunk_size => 150);
571              
572             sub daily_available_tdate {
573             my ($symbol) = @_;
574              
575             # Sep 2017: daily data is present for the current day's trade, during the
576             # trading session. Try reckoning it complete at 6pm.
577             return App::Chart::Download::tdate_today_after
578             (18,0, App::Chart::TZ->for_symbol ($symbol));
579              
580             # return App::Chart::Download::tdate_today_after
581             # (10,30, App::Chart::TZ->for_symbol ($symbol))
582             # - 1;
583             }
584              
585             sub daily_download {
586             my ($symbol_list) = @_;
587             App::Chart::Download::status (__('Yahoo daily data'));
588              
589             # App::Chart::Download::verbose_message ("Yahoo crumb $crumb cookies\n"
590             # . $jar->as_string);
591              
592             my $crumb_errors = 0;
593             SYMBOL: foreach my $symbol (@$symbol_list) {
594             my $lo_tdate = App::Chart::Download::start_tdate_for_update (@$symbol_list);
595             my $hi_tdate = daily_available_tdate ($symbol);
596              
597             App::Chart::Download::status
598             (__('Yahoo data'), $symbol,
599             App::Chart::Download::tdate_range_string ($lo_tdate, $hi_tdate));
600              
601             my $lo_timet = tdate_to_unix($lo_tdate - 2);
602             my $hi_timet = tdate_to_unix($hi_tdate + 2);
603              
604             my $data = daily_cookie_data($symbol);
605             if (! defined $data) {
606             print "Yahoo $symbol does not exist";
607             next SYMBOL;
608             }
609             my $crumb = URI::Escape::uri_escape($data->{'crumb'});
610             my $jar = http_cookies_from_string($data->{'cookies'} // '');
611              
612             my $h = { source => __PACKAGE__,
613             prefer_decimals => 2,
614             date_format => 'ymd',
615             };
616             foreach my $elem (['history',\&daily_parse],
617             ['div', \&daily_parse_div],
618             ['split', \&daily_parse_split]) {
619             my ($events,$parse) = @$elem;
620             my $url = "http://query1.finance.yahoo.com/v7/finance/download/"
621             . URI::Escape::uri_escape($symbol)
622             . "?period1=$lo_timet&period2=$hi_timet&interval=1d&events=$events&crumb=$crumb";
623              
624             my $resp = App::Chart::Download->get ($url,
625             allow_401 => 1,
626             allow_404 => 1,
627             cookie_jar => $jar,
628             );
629             if ($resp->code == 401) {
630             if (++$crumb_errors >= 2) { die "Yahoo: crumb authorization failed"; }
631             App::Chart::Database->write_extra ('', 'yahoo-daily-cookies', undef);
632             redo SYMBOL;
633             }
634             if ($resp->code == 404) {
635             print "Yahoo $symbol does not exist";
636             next SYMBOL;
637             }
638             $parse->($symbol,$resp,$h, $hi_tdate);
639             }
640             ### $h
641             App::Chart::Download::write_daily_group ($h);
642             }
643             }
644              
645             sub daily_parse {
646             my ($symbol, $resp, $h, $hi_tdate) = @_;
647             my @data = ();
648             $h->{'data'} = \@data;
649             my $hi_tdate_iso = App::Chart::tdate_to_iso($hi_tdate);
650              
651             my $body = $resp->decoded_content (raise_error => 1);
652             my @line_list = App::Chart::Download::split_lines($body);
653              
654             unless ($line_list[0] =~ /^Date,Open,High,Low,Close,Adj Close,Volume/) {
655             die "Yahoo: unrecognised daily data headings: " . $line_list[0];
656             }
657             shift @line_list;
658              
659             foreach my $line (@line_list) {
660             my ($date, $open, $high, $low, $close, $adj_volume, $volume)
661             = split (/,/, $line);
662              
663             $date = daily_date_to_iso ($date);
664             if ($date gt $hi_tdate_iso) {
665             # Sep 2017: There's a daily data record during the trading day, but
666             # want to write the database only at the end of trading.
667             next;
668             }
669              
670             # Sep 2017 have seen "null,null,null,...", maybe for non-trading days
671             foreach my $field ($open, $high, $low, $close, $adj_volume, $volume) {
672             if ($field eq 'null') {
673             $field = undef;
674             }
675             }
676              
677             if ($index_pred->match($symbol)) {
678             # In the past indexes which not calculated intraday had
679             # open==high==low==close and volume==0, eg. ^WIL5. Use the close
680             # alone in this case, with the effect of drawing line segments instead
681             # of OHLC or Candle figures with no range.
682              
683             if (defined $open && defined $high && defined $low && defined $close
684             && $open == $high && $high == $low && $low == $close && $volume == 0){
685             $open = undef;
686             $high = undef;
687             $low = undef;
688             }
689              
690             } else {
691             # In the past shares with no trades had volume==0,
692             # open==low==close==bid price, and high==offer price, from some time
693             # during the day, maybe the end of day. Zap all the prices in this
694             # case.
695             #
696             # For a public holiday it might be good to zap the volume to undef
697             # too, but don't have anything to distinguish holiday, suspension,
698             # delisting vs just no trades.
699             #
700             # On the ASX when shares are suspended the bid/offer can be crossed as
701             # usual for pre-open auction, and this gives high<low. For a part-day
702             # suspension then can have volume!=0 in this case too. Don't want to
703             # show a high<low, so massage high/low to open/close range if the high
704             # looks like a crossed offer.
705              
706             if (defined $high && defined $low && $high < $low) {
707             $high = App::Chart::max_maybe ($open, $close);
708             $low = App::Chart::min_maybe ($open, $close);
709             }
710              
711             if (defined $open && defined $low && defined $close && defined $volume
712             && $open == $low && $low == $close && $volume == 0) {
713             $open = undef;
714             $high = undef;
715             $low = undef;
716             $close = undef;
717             }
718             }
719              
720             push @data, { symbol => $symbol,
721             date => $date,
722             open => crunch_trailing_nines($open),
723             high => crunch_trailing_nines($high),
724             low => crunch_trailing_nines($low),
725             close => crunch_trailing_nines($close),
726             volume => $volume };
727             }
728             return $h;
729             }
730             sub daily_parse_div {
731             my ($symbol, $resp, $h) = @_;
732             my @dividends = ();
733             $h->{'dividends'} = \@dividends;
734              
735             my $body = $resp->decoded_content (raise_error => 1);
736             my @line_list = App::Chart::Download::split_lines($body);
737              
738             # Date,Dividends
739             # 2015-11-04,1.4143
740             # 2016-05-17,1.41428
741             # 2017-05-16,1.4143
742             # 2016-11-03,1.4143
743              
744             unless ($line_list[0] =~ /^Date,Dividends/) {
745             warn "Yahoo: unrecognised dividend headings: " . $line_list[0];
746             return;
747             }
748             shift @line_list;
749              
750             foreach my $line (@line_list) {
751             my ($date, $amount) = split (/,/, $line);
752              
753             push @dividends, { symbol => $symbol,
754             ex_date => daily_date_to_iso ($date),
755             amount => $amount };
756             }
757             return $h;
758             }
759             sub daily_parse_split {
760             my ($symbol, $resp, $h) = @_;
761             my @splits = ();
762             $h->{'splits'} = \@splits;
763              
764             my $body = $resp->decoded_content (raise_error => 1);
765             my @line_list = App::Chart::Download::split_lines($body);
766              
767             # GXY.AX split so $10 shares become $2
768             # Date,Stock Splits
769             # 2017-05-22,1/5
770              
771             unless ($line_list[0] =~ /^Date,Stock Splits/) {
772             warn "Yahoo: unrecognised split headings: " . $line_list[0];
773             return;
774             }
775             shift @line_list;
776              
777             foreach my $line (@line_list) {
778             my ($date, $ratio) = split (/,/, $line);
779             my ($old, $new) = split m{/}, $ratio;
780              
781             push @splits, { symbol => $symbol,
782             date => daily_date_to_iso ($date),
783             new => $new,
784             old => $old };
785             }
786             return $h;
787             }
788              
789             # $str is a string like "30.299999"
790             # Return it with trailing 9s turned into trailing 0s.
791             sub crunch_trailing_nines {
792             my ($str) = @_;
793             if (defined $str) {
794             if ($str =~ /(.*)\.(...9+)$/) {
795             return decimal_add_low($str,1);
796             }
797             if ($str =~ /(.*)\.(....*01)$/) {
798             return decimal_add_low($str,-1);
799             }
800             }
801             return $str;
802             }
803             sub decimal_add_low {
804             my ($str, $add) = @_;
805             $str =~ /(.*)\.(.+)$/ or return $str+$add;
806             my $pre = $1;
807             my $post = $2;
808             $str = $pre * 10**length($post) + $post + $add;
809             substr($str, -length($post),0, '.');
810             return $str;
811             }
812              
813             # return a hashref
814             # { cookies => string, # in format HTTP::Cookies ->as_string()
815             # crumb => string
816             # }
817             #
818             # If no such $symbol then return undef;
819             #
820             # Any $symbol which exists is good enough to get a crumb for all later use.
821             # Could hard-code something likely here, but better to go from the symbol
822             # which is wanted.
823             #
824             sub daily_cookie_data {
825             my ($symbol) = @_;
826             require App::Chart::Pagebits;
827             $symbol = URI::Escape::uri_escape($symbol);
828             return App::Chart::Pagebits::get
829             (name => __('Yahoo daily cookie'),
830             url => "https://finance.yahoo.com/quote/$symbol/history?p=$symbol",
831             key => 'yahoo-daily-cookies',
832             freq_days => 3,
833             parse => \&daily_cookie_parse,
834             allow_404 => 1);
835             }
836             sub daily_cookie_parse {
837             my ($content, $resp) = @_;
838              
839             # script like, with backslash escaping on "\uXXXX"
840             #"CrumbStore":{"crumb":"hdDX\u002FHGsZ0Q"}
841             #
842             $content =~ /"CrumbStore":\{"crumb":"([^"]*)"}/
843             or die "Yahoo daily data: CrumbStore not found";
844             my $crumb = App::Chart::Yahoo::javascript_string_unquote($1);
845              
846             # header like
847             # Set-Cookie: B=fab5sl9cqn2rd&b=3&s=i3; expires=Sun, 03-Sep-2018 04:56:13 GMT; path=/; domain=.yahoo.com
848             #
849             # Expiry time is +1 year, but dunno if would really work that long.
850             #
851             require HTTP::Cookies;
852             my $jar = HTTP::Cookies->new;
853             $jar->extract_cookies($resp);
854             my $cookies_str = $jar->as_string;
855              
856             App::Chart::Download::verbose_message ("Yahoo new crumb $crumb\n"
857             . $cookies_str);
858             return { crumb => $crumb,
859             cookies => $cookies_str };
860             }
861              
862             # return tdate for a date STR from historical data
863             # "2005-03-07" AGK.AX seen in jan07, maybe transient
864             # "20-Aug-02" past format
865             #
866             sub daily_date_to_iso {
867             my ($str) = @_;
868             if ($str =~ /[A-Za-z]/) {
869             return App::Chart::Download::Decode_Date_EU_to_iso ($str); # dmy
870             } else {
871             return $str;
872             }
873             }
874              
875             # Return seconds since 00:00:00, 1 Jan 1970 GMT.
876             sub tdate_to_unix {
877             my ($tdate) = @_;
878             my ($year, $month, $day) = App::Chart::tdate_to_ymd ($tdate);
879             require Time::Local;
880             return Time::Local::timegm (0, 0, 0, $day, $month-1, $year-1900);
881             }
882              
883             # $str is a string from previous HTTP::Cookies ->as_string()
884             # Return a new HTTP::Cookies object with that content.
885             sub http_cookies_from_string {
886             my ($str) = @_;
887             require File::Temp;
888             my $fh = File::Temp->new (TEMPLATE => 'chart-XXXXXX',
889             TMPDIR => 1);
890             print $fh "#LWP-Cookies-1.0\n", $str or die;
891             close $fh or die;
892             require HTTP::Cookies;
893             my $jar = HTTP::Cookies->new;
894             $jar->load($fh->filename);
895             return $jar;
896             }
897              
898              
899             #-----------------------------------------------------------------------------
900             # stock info
901             #
902             # Eg. http://download.finance.yahoo.com/d?f=snxc4qr1d&s=TLS.AX
903              
904             App::Chart::DownloadHandler->new
905             (name => __('Yahoo info'),
906             key => 'Yahoo-info',
907             pred => $download_pred,
908             proc => \&info_download,
909             recheck_days => 7,
910             max_symbols => MAX_QUOTES);
911              
912             sub info_download {
913             my ($symbol_list) = @_;
914              
915             App::Chart::Download::status
916             (__x('Yahoo info {symbolrange}',
917             symbolrange =>
918             App::Chart::Download::symbol_range_string ($symbol_list)));
919              
920             my $url = 'http://download.finance.yahoo.com/d?f=snxc4qr1d&s='
921             . join (',', map { URI::Escape::uri_escape($_) } @$symbol_list);
922             my $resp = App::Chart::Download->get ($url);
923             my $h = info_parse($resp);
924             $h->{'recheck_list'} = $symbol_list;
925             App::Chart::Download::write_daily_group ($h);
926             }
927              
928             sub info_parse {
929             my ($resp) = @_;
930              
931             my $content = $resp->decoded_content (raise_error => 1);
932             if (DEBUG >= 2) { print "Yahoo info:\n$content\n"; }
933              
934             my @info;
935             my @dividends;
936             my $h = { source => __PACKAGE__,
937             info => \@info,
938             dividends => \@dividends };
939              
940             require Text::CSV_XS;
941             my $csv = Text::CSV_XS->new;
942              
943             foreach my $line (App::Chart::Download::split_lines ($content)) {
944             $csv->parse($line);
945             my ($symbol, $name, $exchange, $currency, $ex_date, $pay_date, $amount)
946             = $csv->fields();
947              
948             $ex_date = quote_parse_div_date ($ex_date);
949             $pay_date = quote_parse_div_date ($pay_date);
950              
951             push @info, { symbol => $symbol,
952             name => $name,
953             currency => $currency,
954             exchange => $exchange };
955             # circa 2015 the "d" dividend amount field is "N/A" when after the
956             # dividend payment (with "r1" pay date "N/A" too)
957             if ($ex_date && $amount ne 'N/A' && $amount != 0) {
958             push @dividends, { symbol => $symbol,
959             ex_date => $ex_date,
960             pay_date => $pay_date,
961             amount => $amount };
962             }
963             }
964             return $h;
965             }
966              
967              
968             #------------------------------------------------------------------------------
969             # undo javascript string backslash quoting in STR, per
970             #
971             # https://developer.mozilla.org/en/JavaScript/Guide/Values,_Variables,_and_Literals#String_Literals
972             #
973             # Encode::JavaScript::UCS does \u, but not the rest
974             #
975             # cf Java as such not quite the same:
976             # unicode: http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#100850
977             # strings: http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#101089
978             #
979             my %javascript_backslash = ('b' => "\b", # backspace
980             'f' => "\f", # formfeed
981             'n' => "\n", # newline
982             'r' => "\r",
983             't' => "\t", # tab
984             'v' => "\013", # vertical tab
985             );
986             sub javascript_string_unquote {
987             my ($str) = @_;
988             $str =~ s{\\(?:
989             ((?:[0-3]?[0-7])?[0-7]) # $1 \377 octal latin-1
990             |x([0-9a-fA-F]{2}) # $2 \xFF hex latin-1
991             |u([0-9a-fA-F]{4}) # $3 \uFFFF hex unicode
992             |(.) # $4 \n etc escapes
993             )
994             }{
995             (defined $1 ? chr(oct($1))
996             : defined $4 ? ($javascript_backslash{$4} || $4)
997             : chr(hex($2||$3))) # \x,\u hex
998             }egx;
999             return $str;
1000             }
1001              
1002             #------------------------------------------------------------------------------
1003             1;
1004             __END__