fun with stock quotes

Will Ware wware at world.std.com
Fri Dec 31 23:10:18 EST 1999


I recently discovered that Yahoo's stock-quote-serving web
page is now offering historical stock data in machine-friendly
form. The following scripts will pull in a bunch of stock data
and format it as a C module. Then you can back-test any silly
investment algorithms you come up with. Nerdy greedy fun for
the whole family.

--------------------- begin ------------------------------------
#!/bin/sh
# This is a shell archive (produced by shar 3.50)
# To extract the files from this archive, save it to a file, remove
# everything above the "!/bin/sh" line above, and type "sh file_name".
#
# made 01/01/2000 04:02 UTC by wware at world
# Source directory /tmp/var-tmp/wware/foo
#
# existing files will NOT be overwritten unless -c is specified
#
# This shar contains:
# length  mode       name
# ------ ---------- ------------------------------------------
#    397 -r--r--r-- Makefile
#   3182 -rw------- buildc.py
#   6856 -r--r--r-- stockp.c
#    194 -r--r--r-- stocks.h
#
# ============= Makefile ==============
if test -f 'Makefile' -a X"$1" != X"-c"; then
	echo 'x - skipping Makefile (File already exists)'
else
echo 'x - extracting Makefile (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'Makefile' &&
CFLAGS = -Wall -O2 -I/usr/include/python1.5
all: stockp.so
X
.SUFFIXES: .c .o .so
X
.c.o:
X	gcc $(CFLAGS) -c -o $@ $(@:.o=.c)
X
stockp.so: stockp.o stocks.o
X	gcc $(CFLAGS) -shared -o stockp.so stocks.o stockp.o
X
stockp.o: stockp.c stocks.h
stocks.o: stocks.c stocks.h
X
clean:
X	rm -f *.o *.so stockp*
X
tgz:
X	(cd ..; tar cf - stocks/*.c stocks/Makefile stocks/*.h \
X		stocks/foo.py | gzip > stockc.tgz)
SHAR_EOF
chmod 0444 Makefile ||
echo 'restore of Makefile failed'
Wc_c="`wc -c < 'Makefile'`"
test 397 -eq "$Wc_c" ||
	echo 'Makefile: original size 397, current size' "$Wc_c"
fi
# ============= buildc.py ==============
if test -f 'buildc.py' -a X"$1" != X"-c"; then
	echo 'x - skipping buildc.py (File already exists)'
else
echo 'x - extracting buildc.py (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'buildc.py' &&
'''The other day I discovered that quote.yahoo.com now offers
historical stock data. Delightful! The obvious next thing was to write
a Python class that fetches the historical data off the web and tries
to make some money with it. My feeble attempt follows. <NOTA BENE>This
is not an endorsement for any investment style or particular
investment. Your money, and your innate God-given stupidity, are
entirely your own.</NOTA BENE>'''
X
import urllib, string, normalDate, math, regex, sys, os
X
masterList = [
X    # tech stocks
X    'MSFT', 'AMZN', 'INTC', 'RHAT', 'DELL', 'GTW',
X    'CSCO', 'AAPL', 'AOL', 'COMS', 'CPQ', 'LCOS', 'LU',
X    'ORCL', 'SUNW', 'TXN', 'YHOO',
X    # single-letters and blue chips
X    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
X    'J', 'K', 'L', 'N', 'O', 'P', 'R',
X    'U', 'W', 'X', 'Y', 'Z',
X    # Dow Jones stocks
X    'AA', 'AXP', 'T', 'BA', 'CAT', 'CHV', 'KO',
X    'DIS', 'DD', 'EK', 'GE', 'GM', 'GT', 'HWP',
X    'IBM', 'IP', 'JNJ', 'MCD', 'MRK', 'MMM', 'JPM',
X    'MO', 'PG', 'S', 'TRV', 'UK', 'WMT',
X    # biotech
X    'AFFX', 'BIOM', 'CRA', 'ENMD', 'HGSI',
X    'MATX', 'MLNM', 'RGEN', 'SAFS', 'DNA', 'GENE',
X    'INCY', 'GLGC', 'PFE',
X    'AVXT', 'CTIC', 'CRXA', 'GZMO', 'IDPH',
X    'ILXO', 'ISIP', 'RZYM', 'SUPG', 'TGEN', 'VICL',
X    # retailers, general interest
X    'BBY', 'COST', 'UPS', 'HD',
X    ]
X
# just do a quick test for now
masterList = masterList[:12]
X
def fetchRange(symbol, first, last):
X    '''go to Yahoo and fetch a range of historical stock
X    data, where 'first' and 'last' are normalDates'''
X    key = (first, last)
X    url = 'http://chart.yahoo.com/table.csv?s=%s' \
X          '&a=%d&b=%d&c=%d&d=%d&e=%d&f=%d&g=d&q=q&y=0&z=%s' \
X          '&x=.csv' % (symbol,
X                       first.month(), first.day(), first.year(),
X                       last.month(), last.day(), last.year(),
X                       symbol)
X    lst = [ ]
X    for x in urllib.URLopener().open(url).readlines()[1:]:
X        x = string.split(x, ',')
X        date, closingPrice = x[0], x[4]
X        date = string.split(date, '-')
X        date[0] = eval(date[0])
X        date[2] = eval(date[2])
X        closingPrice = eval(closingPrice)
X        if date[2] > 90: year = date[2] + 1900
X        else: year = date[2] + 2000
X        month = ['Jan', 'Feb', 'Mar',
X                 'Apr', 'May', 'Jun',
X                 'Jul', 'Aug', 'Sep',
X                 'Oct', 'Nov', 'Dec'].index(date[1]) + 1
X        date = normalDate.ND(year * 10000 +
X                             month * 100 + date[0])
X        lst.append(date, closingPrice)
X    lst.reverse()  # Yahoo gives reverse time order
X    if len(lst) == 0:
X        raise 'ouch'
X    return lst
X
start = normalDate.ND(19950101)
finish = normalDate.ND()   # today
X
print '#include "Python.h"'
print '#include "stocks.h"'
print
X
sizes = { }
for s in masterList:
X    L = fetchRange(s, start, finish)
X    print 'static struct closing_price %s_dat[] = {' % s
X    sizes[s] = len(L)
X    for (x, y) in L:
X        print '  {%d, %f},' % (x, y)
X    print '};'
X    print
X
print 'struct stock_data_entry stock_data[] = {'
for s in masterList:
X    print '  {"%s", %s_dat, %d},' % (s, s, sizes[s])
print '  {NULL, NULL, 0}'
print '};'
SHAR_EOF
chmod 0600 buildc.py ||
echo 'restore of buildc.py failed'
Wc_c="`wc -c < 'buildc.py'`"
test 3182 -eq "$Wc_c" ||
	echo 'buildc.py: original size 3182, current size' "$Wc_c"
fi
# ============= stockp.c ==============
if test -f 'stockp.c' -a X"$1" != X"-c"; then
	echo 'x - skipping stockp.c (File already exists)'
else
echo 'x - extracting stockp.c (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'stockp.c' &&
/* Python code for interfacing the big honkin' historical stock data library */
X
#include <string.h>
#include <math.h>
#include "Python.h"
#include "stocks.h"
X
static PyObject *StockpError;
X
static PyObject * symbolList(PyObject *self, PyObject *args)
{
X  struct stock_data_entry *p;
X  PyObject *q;
X  if (!PyArg_ParseTuple(args, ""))
X    return NULL;
X  q = PyList_New(0);
X  for (p = stock_data; p->name != NULL; p++)
X    PyList_Append(q, Py_BuildValue("s", p->name));
X  return q;
}
X
/* Maybe it makes sense to rewrite "lookup" as a __getitem__ method.
X * That could be done in a Python wrapper, don't bother here.
X */
X
static struct closing_price * last_price_before(struct closing_price *array,
X						int arraysize,
X						int date)
{
X  struct closing_price *q;
X  q = array + arraysize - 1;
X  while (1)
X    {
X      if (q->date == date)
X	return q;
X      if (q == array)
X	return NULL;  /* stock didn't exist at that date */
X      /* handle weekends and holidays, pick up the last
X       * closing price prior to the date arg
X       */
X      if ((q-1)->date <= date && q->date > date)
X	return q - 1;
X      q--;
X    }
}
X
static PyObject * lookup(PyObject *self, PyObject *args)
{
X  char *symbol;
X  int date;
X  struct stock_data_entry *p;
X  struct closing_price *q;
X  PyObject *nd;   /* this is a normalDate */
X  if (!PyArg_ParseTuple(args, "sO", &symbol, &nd))
X    return NULL;
X  date = PyInt_AsLong(PyObject_GetAttrString(nd, "normalDate"));
X  for (p = stock_data; p->name != NULL; p++)
X    if (strcmp(p->name, symbol) == 0)
X      {
X	q = last_price_before(p->data, p->size, date);
X	if (q == NULL)
X	  {
X	    PyErr_SetString(StockpError, "no data for that date");
X	    return NULL;
X	  }
X	else
X	  return Py_BuildValue("f", q->price);
X      }
X  PyErr_SetString(StockpError, "no such stock symbol");
X  return NULL;
}
X
static int subRange(struct stock_data_entry *p,
X		    int first, int last,
X		    struct closing_price **first_closing_price,
X		    struct closing_price **last_closing_price)
{
X  int i;
X  struct closing_price *q = p->data;
X  if (last < first)
X    return 1;   /* bad range */
X  if (last < q[0].date)
X    return 2;   /* range is before all stock data */
X  if (first > q[p->size-1].date)
X    return 3;   /* range is after all stock data */
X  /* discard anything off the end of the range */
X  if (first < q[0].date)
X    first = q[0].date;
X  if (last > q[p->size-1].date)
X    last = q[p->size-1].date;
X  /* Find first in the array. If first lands on a weekend or
X   * holiday, pick the next valid date. If you fall off the
X   * end of the array, return an error code.
X   */
X  for (i = 0; ; i++)
X    {
X      if (i == p->size)  /* fell off the end of the array */
X	return 3;
X      if (q[i].date >= first)
X	break;
X    }
X  *first_closing_price = &p->data[i];
X  for (i = p->size - 1; ; i--)
X    {
X      if (i == -1)  /* fell off the end of the array */
X	return 2;
X      if (q[i].date <= last)
X	break;
X    }
X  *last_closing_price = &p->data[i];
X  return 0;  /* no error */
}
X
static PyObject * fetchRange(PyObject *self, PyObject *args)
{
X  char *symbol;
X  int first, last;
X  struct stock_data_entry *p;
X  struct closing_price *s, *t;
X  PyObject *r;
X  PyObject *nd1, *nd2;
X  if (!PyArg_ParseTuple(args, "sOO", &symbol, &nd1, &nd2))
X    return NULL;
X  first = PyInt_AsLong(PyObject_GetAttrString(nd1, "normalDate"));
X  last = PyInt_AsLong(PyObject_GetAttrString(nd2, "normalDate"));
X  for (p = stock_data; p->name != NULL; p++)
X    if (strcmp(p->name, symbol) == 0)
X      {
X	r = PyList_New(0);
X	if (subRange(p, first, last, &s, &t) != 0)
X	  {
X	    PyErr_SetString(StockpError, "bad range");
X	    return NULL;
X	  }
X	for ( ; s != t + 1; s++)
X	  PyList_Append(r, Py_BuildValue("(if)", s->date, s->price));
X	return r;
X      }
X  PyErr_SetString(StockpError, "no such stock symbol");
X  return NULL;
}
X
/* Date calculations lifted from normalDate.py */
X
static int daysInMonthNormal[] = {31,28,31,30,31,30,31,31,30,31,30,31};
static int daysInMonthLeapYear[] = {31,29,31,30,31,30,31,31,30,31,30,31};
X
static int isLeapYear(int year)
{
X  if ((year % 4) != 0) return 0;
X  if ((year % 100) != 0) return 1;
X  if ((year % 400) != 0) return 0;
X  return 1;
}
X
static int dateToInt(int date)
{
X  /* days since Jan 1, 1900 */
X  int year, month, day, leapAdjust, i, *p;
X  year = date / 10000;
X  month = (date - 10000 * year) / 100;
X  day = date - 10000 * year - 100 * month;
X  leapAdjust = (year + 3) / 4;
X  leapAdjust -= (year + 99 - 1600) / 100 + (year + 399 - 1600) / 400;
X  day += year * 365 + leapAdjust - 693972;
X  /* figure leap years to figure days per month */
X  if (isLeapYear(year))
X    p = daysInMonthLeapYear;
X  else
X    p = daysInMonthNormal;
X  for (i = 0; i < month - 1; i++)
X    day += p[i];
X  return day;
}
X
static PyObject * pyDateToInt(PyObject *self, PyObject *args)
{
X  /* this will make it possible to do regression tests on the
X   * dateToInt function, which is easily hairy enough to have
X   * a bug in it
X   */
X  int first;
X  PyObject *nd;
X  if (!PyArg_ParseTuple(args, "O", &nd))
X    return NULL;
X  first = PyInt_AsLong(PyObject_GetAttrString(nd, "normalDate"));
X  return Py_BuildValue("i", dateToInt(first));
}
X
static PyObject * linearRegression(PyObject *self, PyObject *args)
{
X  char *symbol;
X  int first, last, N;
X  double A, B, C, D, E, F, G, sigma;
X  struct stock_data_entry *p;
X  struct closing_price *s, *t;
X  PyObject *nd1, *nd2;
X  if (!PyArg_ParseTuple(args, "sOO", &symbol, &nd1, &nd2))
X    return NULL;
X  first = PyInt_AsLong(PyObject_GetAttrString(nd1, "normalDate"));
X  last = PyInt_AsLong(PyObject_GetAttrString(nd2, "normalDate"));
X  for (p = stock_data; p->name != NULL; p++)
X    if (strcmp(p->name, symbol) == 0)
X      {
X	if (subRange(p, first, last, &s, &t) != 0)
X	  {
X	    PyErr_SetString(StockpError, "bad range");
X	    return NULL;
X	  }
X	C = D = E = F = G = 0.0;
X	N = 0;
X	for ( ; s != t + 1; s++)
X	  {
X	    double x, y;
X	    x = (double) dateToInt(s->date);
X	    y = log(s->price);
X	    N++;
X	    C += x * y;
X	    D += x * x;
X	    E += x;
X	    F += y;
X	    G += y * y;
X	  }
X	if (N == 0)
X	  {
X	    PyErr_SetString(StockpError, "bad range");
X	    return NULL;
X	  }
X	A = (N * C - E * F) / (D * N - E * E);
X	B = (D * F - C * E) / (D * N - E * E);
X	sigma = sqrt((G + A*A*D + 2*A*B*E + B*B*N - 2*A*C - 2*B*F) / N);
X	return Py_BuildValue("(ddd)", A, B, sigma);
X      }
X  PyErr_SetString(StockpError, "no such stock symbol");
X  return NULL;
}
X
X
static PyMethodDef stockp_methods[] = {
X  {"symbolList", symbolList, 1},
X  {"lookup", lookup, 1},
X  {"fetchRange", fetchRange, 1},
X  {"dateToInt", pyDateToInt, 1},
X  {"linearRegression", linearRegression, 1},
X  {NULL, NULL}
};
X
void initstockp(void)
{
X  PyObject *m, *d;
X  m = Py_InitModule("stockp", stockp_methods);
X  d = PyModule_GetDict(m);
X  StockpError = PyErr_NewException("stockp.error", NULL, NULL);
X  if (StockpError != NULL)
X    PyDict_SetItemString(d, "error", StockpError);
}
SHAR_EOF
chmod 0444 stockp.c ||
echo 'restore of stockp.c failed'
Wc_c="`wc -c < 'stockp.c'`"
test 6856 -eq "$Wc_c" ||
	echo 'stockp.c: original size 6856, current size' "$Wc_c"
fi
# ============= stocks.h ==============
if test -f 'stocks.h' -a X"$1" != X"-c"; then
	echo 'x - skipping stocks.h (File already exists)'
else
echo 'x - extracting stocks.h (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'stocks.h' &&
struct closing_price {
X  unsigned int date;
X  float price;
};
X
struct stock_data_entry {
X  char *name;
X  struct closing_price *data;
X  int size;
};
X
extern struct stock_data_entry stock_data[];
SHAR_EOF
chmod 0444 stocks.h ||
echo 'restore of stocks.h failed'
Wc_c="`wc -c < 'stocks.h'`"
test 194 -eq "$Wc_c" ||
	echo 'stocks.h: original size 194, current size' "$Wc_c"
fi
exit 0
---------------------- end -----------------------------------
-- 
 - - - - - - - - - - - - - - - - - - - - - - - -
Resistance is futile. Capacitance is efficacious.
Will Ware	email:    wware @ world.std.com



More information about the Python-list mailing list