77import tempfile
88import datetime as dt
99import time
10+ import csv
1011
1112from collections import defaultdict
1213
@@ -45,7 +46,7 @@ def DataReader(name, data_source=None, start=None, end=None,
4546 the name of the dataset. Some data sources (yahoo, google, fred) will
4647 accept a list of names.
4748 data_source: str
48- the data source ("yahoo", "google", "fred", or "ff")
49+ the data source ("yahoo", "yahoo-actions", " google", "fred", or "ff")
4950 start : {datetime, None}
5051 left boundary for range (defaults to 1/1/2010)
5152 end : {datetime, None}
@@ -57,6 +58,9 @@ def DataReader(name, data_source=None, start=None, end=None,
5758 # Data from Yahoo! Finance
5859 gs = DataReader("GS", "yahoo")
5960
61+ # Corporate Actions (Dividend and Split Data) with ex-dates from Yahoo! Finance
62+ gs = DataReader("GS", "yahoo-actions")
63+
6064 # Data from Google Finance
6165 aapl = DataReader("AAPL", "google")
6266
@@ -75,6 +79,9 @@ def DataReader(name, data_source=None, start=None, end=None,
7579 return get_data_yahoo (symbols = name , start = start , end = end ,
7680 adjust_price = False , chunksize = 25 ,
7781 retry_count = retry_count , pause = pause )
82+ elif data_source == "yahoo-actions" :
83+ return get_data_yahoo_actions (symbol = name , start = start , end = end ,
84+ retry_count = retry_count , pause = pause )
7885 elif data_source == "google" :
7986 return get_data_google (symbols = name , start = start , end = end ,
8087 adjust_price = False , chunksize = 25 ,
@@ -423,6 +430,81 @@ def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3,
423430 return _get_data_from (symbols , start , end , interval , retry_count , pause ,
424431 adjust_price , ret_index , chunksize , 'yahoo' )
425432
433+ _HISTORICAL_YAHOO_ACTIONS_URL = 'http://ichart.finance.yahoo.com/x?'
434+
435+ def get_data_yahoo_actions (symbol , start = None , end = None , retry_count = 3 ,
436+ pause = 0.001 ):
437+ """
438+ Returns DataFrame of historical corporate actions (dividends and stock
439+ splits) from symbols, over date range, start to end. All dates in the
440+ resulting DataFrame correspond with dividend and stock split ex-dates.
441+
442+ Parameters
443+ ----------
444+ sym : string with a single Single stock symbol (ticker).
445+ start : string, (defaults to '1/1/2010')
446+ Starting date, timestamp. Parses many different kind of date
447+ representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
448+ end : string, (defaults to today)
449+ Ending date, timestamp. Same format as starting date.
450+ retry_count : int, default 3
451+ Number of times to retry query request.
452+ pause : int, default 0
453+ Time, in seconds, of the pause between retries.
454+ """
455+
456+ start , end = _sanitize_dates (start , end )
457+ url = (_HISTORICAL_YAHOO_ACTIONS_URL + 's=%s' % symbol +
458+ '&a=%s' % (start .month - 1 ) +
459+ '&b=%s' % start .day +
460+ '&c=%s' % start .year +
461+ '&d=%s' % (end .month - 1 ) +
462+ '&e=%s' % end .day +
463+ '&f=%s' % end .year +
464+ '&g=v' )
465+
466+ for _ in range (retry_count ):
467+ time .sleep (pause )
468+
469+ try :
470+ with urlopen (url ) as resp :
471+ lines = resp .read ()
472+ except _network_error_classes :
473+ pass
474+ else :
475+ actions_index = []
476+ actions_entries = []
477+
478+ for line in csv .reader (StringIO (bytes_to_str (lines ))):
479+ # Ignore lines that aren't dividends or splits (Yahoo
480+ # add a bunch of irrelevant fields.)
481+ if len (line ) != 3 or line [0 ] not in ('DIVIDEND' , 'SPLIT' ):
482+ continue
483+
484+ action , date , value = line
485+ if action == 'DIVIDEND' :
486+ actions_index .append (to_datetime (date ))
487+ actions_entries .append ({
488+ 'action' : action ,
489+ 'value' : float (value )
490+ })
491+ elif action == 'SPLIT' and ':' in value :
492+ # Convert the split ratio to a fraction. For example a
493+ # 4:1 split expressed as a fraction is 1/4 = 0.25.
494+ denominator , numerator = value .split (':' , 1 )
495+ split_fraction = float (numerator ) / float (denominator )
496+
497+ actions_index .append (to_datetime (date ))
498+ actions_entries .append ({
499+ 'action' : action ,
500+ 'value' : split_fraction
501+ })
502+
503+ return DataFrame (actions_entries , index = actions_index )
504+
505+ raise IOError ("after %d tries, Yahoo! did not "
506+ "return a 200 for url %r" % (retry_count , url ))
507+
426508
427509def get_data_google (symbols = None , start = None , end = None , retry_count = 3 ,
428510 pause = 0.001 , adjust_price = False , ret_index = False ,
0 commit comments