importing csv file into sqlite

James Mills prologic at shortcircuit.net.au
Thu Dec 18 01:17:47 EST 2008


On Thu, Dec 18, 2008 at 3:58 PM, klia <alwaseem307ster at yahoo.com> wrote:
> hey guys, i have a hug .csv file which i need to insert it into sqlite
> database using python.
> my csv data looks like this
> Birthday2,12/5/2008,HTC,this is my birthday
> Sea,12/3/2008,kodak,sea
> birthday4,14/3/2009,samsung,birthday
> love,17/4/2009,SONY,view of island
>
> can any one give me a head start codes.

How big ? When size is important in data
processing, you should _never_ try to
load it all up at once. Use filters...

Here's a head start:

$ csv2sql.py mydata.csv | sqlite3 mydatabase.db

Here's the source to my csv2sql.py tool (1):

------------------------------------------------------------
#!/usr/bin/env python

# Module:   csv2sql
# Date:     14th September 2008
# Author:   James Mills, prologic at shortcircuit dot net dot au

"""csv2sql

Tool to convert CSV data files into SQL statements that
can be used to create SQL tables. Each line of text in
the file is read, parsed and converted to SQL and output
to stdout (which can be piped).
"""

__desc__ = "CSV to SQL Tool"
__version__ = "0.2"
__author__ = "James Mills"
__email__ = "%s, prologic at shortcircuit dot net dot au" % __author__
__url__ = "http://shortcircuit.net.au/~prologic/"
__copyright__ = "CopyRight (C) 2008 by %s" % __author__
__license__ = "GPL"

import os
import csv
import optparse
from cStringIO import StringIO

USAGE = "%prog [options] <file>"
VERSION = "%prog v" + __version__

def parse_options():
  """parse_options() -> opts, args

  Parse any command-line options given returning both
  the parsed options and arguments.
  """

  parser = optparse.OptionParser(usage=USAGE, version=VERSION)

  parser.add_option("-t", "--table",
        action="store", default=None, dest="table",
        help="Specify table name")

  parser.add_option("-f", "--fields",
        action="store", default=None, dest="fields",
        help="Specify a list of fields")

  opts, args = parser.parse_args()

  if len(args) < 1:
     parser.print_help()
     raise SystemExit, 1

  return opts, args

def mkBuffer(fd):
  buffer = StringIO()
  buffer.write(fd.read())
  buffer.seek(0)
  fd.close()
  return buffer

def readCSV(file):
  if type(file) == str:
     fd = open(file, "rU")
  else:
     fd = file

  fd = mkBuffer(fd)

  sniffer = csv.Sniffer()
  dialect = sniffer.sniff(fd.readline())
  fd.seek(0)

  reader = csv.reader(fd, dialect)
  for line in reader:
     yield line

def main():
  opts, args = parse_options()

  file = args[0]

  if file == "-":
     fd = sys.stdin
     if opts.table is None:
        print "ERROR: No table specified and stdin used."
        raise SystemExit(1)
  else:
     fd = open(file, "rU")
     if opts.table is None:
        table = os.path.splitext(file)[0]
     else:
        table = opts.table

  for line in readCSV(fd):
     if opts.fields:
        fields = [x.strip() for x in opts.fields.split(",")]
        fields = "(%s)" % ",".join(fields)
     else:
        fields = ""
     values = ",".join(["\"%s\"" % x for x in line])
     print "INSERT INTO %s %s VALUES (%s);" % (table, fields, values)

if __name__ == "__main__":
  main()

----------------------------------------------------------------

Hope this helps,

Here's a small test of using it:

jmills at atomant:~$ python
Python 2.5.2 (r252:60911, Oct  5 2008, 19:24:49)
[GCC 4.3.2] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> fd = open("test.csv", "w")
>>> for i in xrange(1000000):
...     fd.write("foo,bar,%d\n" % i)
...
>>> fd.close()
>>>
jmills at atomant:~$ du -h test.csv
15M     test.csv
jmills at atomant:~$ wc -l test.csv
1000000 test.csv
jmills at atomant:~$ time csv2sql.py test.csv > test.sql

real    0m14.303s
user    0m8.877s
sys     0m0.676s

cheers
James

References:

1. http://hg.softcircuit.com.au/projects/tools/



More information about the Python-list mailing list