Memory error due to big input file

sityee kong skong1 at gmail.com
Mon Jul 13 13:10:00 EDT 2009


Hi All,

I have a similar problem that many new python users might encounter. I would
really appreciate if you could help me fix the error.
I have a big text file with size more than 2GB. It turned out memory error
when reading in this file. Here is my python script, the error occurred at
line -- self.fh.readlines().

import math
import time

class textfile:
  def __init__(self,fname):
     self.name=fname
     self.fh=open(fname)
     self.fh.readline()
     self.lines=self.fh.readlines()

a=textfile("/home/sservice/nfbc/GenoData/CompareCalls3.diff")

lfile=len(a.lines)

def myfun(snp,start,end):
  subdata=a.lines[start:end+1]
  NEWmiss=0
  OLDmiss=0
  DIFF=0
  for row in subdata:
     k=row.split()
     if (k[3]=="0/0") & (k[4]!="0/0"):
        NEWmiss=NEWmiss+1
     elif (k[3]!="0/0") & (k[4]=="0/0"):
        OLDmiss=OLDmiss+1
     elif (k[3]!="0/0") & (k[4]!="0/0"):
        DIFF=DIFF+1
  result.write(snp+" "+str(NEWmiss)+" "+str(OLDmiss)+" "+str(DIFF)+"\n")

result=open("Summary_noLoop_diff3.txt","w")
result.write("SNP NEWmiss OLDmiss DIFF\n")

start=0
snp=0
for i in range(lfile):
  if (i==0): continue
  after=a.lines[i].split()
  before=a.lines[i-1].split()
  if (before[0]==after[0]):
    if (i!=(lfile-1)): continue
    else:
      end=lfile-1
      myfun(before[0],start,end)
      snp=snp+1
  else:
    end=i-1
    myfun(before[0],start,end)
    snp=snp+1
    start=i
    if (i ==(lfile-1)):
      myfun(after[0],start,start)
      snp=snp+1

result.close()

  sincerely, phoebe
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-list/attachments/20090713/79fc0e80/attachment.html>


More information about the Python-list mailing list