Creating 50K text files in python

Peter Otten __peter__ at web.de
Wed Mar 18 10:08:27 EDT 2009


venutaurus539 at gmail.com wrote:

> On Mar 18, 6:35 pm, Peter Otten <__pete... at web.de> wrote:
>> venutaurus... at gmail.com wrote:
>> > Hello all,
>> > I've an application where I need to create 50K files spread
>> > uniformly across 50 folders in python. The content can be the name of
>> > file itself repeated 10 times.I wrote a code using normal for loops
>> > but it is taking hours together for that. Can some one please share
>> > the code for it using Multithreading. As am new to Python, I know
>> > little about threading concepts.
>>
>> > This is my requiremnt:
>>
>> > C:\TestFolder....
>> > That folder contains 5 Folders.. Folder1, Folder2, Folder3.....
>> > Folder5
>> > Each folder in turn contains 10 folders:
>> > and Each of those folder contains 1000 text files.
>>
>> > Please let me know if you are not clear.
>>
>> > Thank you,
>> > Venu Madhav.
>>
>> I've just tried it, creating the 50,000 text files took 17 seconds on my
>> not very fast machine. Python is certainly not the bottleneck here.
>>
>> Peter
> 
> Really...!!!! I just can't beleive it. I am running my scirpt on an
> INTEL Core2duo CPU machine with 2GB of RAM. I've started it two hours
> back, but still it is running. This is how my code looks like
> 
> 
> def createFiles(path):
>     m.write(strftime("%Y-%m-%d %H:%M:%S") +" Creating files in the
> folder "+path+"\n")
>     global c
>     global d
>     os.chdir (path)
>     k = 1
>     for k in range (1,1001):
>         p = "%.2d"%(k)
>         FName = "TextFile"+c+"_"+d+"_"+p+".txt"
>         l =1
>         for l in range(1 , 11):
>             os.system ("\"echo "+FName+" >> "+FName+"\"")
>             l = l +1
>         k = k+1
> 
> 
> 
> MainPath = "C:\\Many_50000_1KB"
> try:
>     os.mkdir (MainPath)
>     m.write(strftime("%Y-%m-%d %H:%M:%S") +" Created the base directory
> \n")
> except:
>     m.write(strftime("%Y-%m-%d %H:%M:%S") +" base directory already
> exists\n")
> os.chdir (MainPath)
> for i in range (1 , 6):
>     j = 1
>     c = "%.2d"%(i)
>     FolderName ="Folder"+c
>     try:
>         os.mkdir (FolderName)
>         m.write(strftime("%Y-%m-%d %H:%M:%S") +" Created the folder
> "+FolderName+"\n")
>     except:
>         m.write(strftime("%Y-%m-%d %H:%M:%S") +" Folder "+FolderName+"
> already exists \n")
>     os.chdir (FolderName)
>     path = os.getcwd ()
>     #createFiles(path)
>     for j in range ( 1 , 11):
>         d = "%.2d"%(j)
>         FolderName = "Folder"+c+"_"+d
>         try:
>             os.mkdir (FolderName)
>             m.write(strftime("%Y-%m-%d %H:%M:%S") +" Created the
> folder "+FolderName+"\n")
>         except:
>             m.write(strftime("%Y-%m-%d %H:%M:%S") +" the folder
> "+FolderName+" exists \n")
>         os.chdir (FolderName)
>         path = os.getcwd ()
>         createFiles(path)
>         os.chdir ("..")
>         j = j + 1
>     os.chdir ("..")
>     i = i + 1
> 
> Can you please let me know where do I have to modify it, to make it
> faster.

Multiple (!) os.system() calls to create a file are certainly a bad idea.
Personally I don't use os.chdir() because I try to avoid global state when
possible.

Here's my somewhat carelessly written code. Change the 

root = ...

line as appropriate before you run it (the root folder must not exist).


from __future__ import with_statement

import os

def makedir(*parts):
    dir = os.path.join(*parts)
    os.mkdir(dir)
    return dir

def makefile(*parts):
    fn = os.path.join(*parts)
    with open(fn, "w") as out:
        out.write((parts[-1]+"\n")*10)

if __name__ == "__main__":
    root = "./tmp_tree"
    makedir(root)
    for i in range(1, 6):
        outer = "Folder%d" % i
        makedir(root, outer)
        for k in range(1, 11):
            inner = "Folder%02d" % k
            dir = makedir(root, outer, inner)
            for i in range(1, 1001):
                makefile(dir, "file%04d.txt" % i)

Peter



More information about the Python-list mailing list