multiprocessing vs thread performance

mk mrkafk at gmail.com
Mon Dec 29 09:52:07 EST 2008


Hello everyone,

After reading http://www.python.org/dev/peps/pep-0371/ I was under 
impression that performance of multiprocessing package is similar to 
that of thread / threading. However, to familiarize myself with both 
packages I wrote my own test of spawning and returning 100,000 empty 
threads or processes (while maintaining at most 100 processes / threads 
active at any one time), respectively.

The results I got are very different from the benchmark quoted in PEP 
371. On twin Xeon machine the threaded version executed in 5.54 secs, 
while multiprocessing version took over 222 secs to complete!

Am I doing smth wrong in code below? Or do I have to use 
multiprocessing.Pool to get any decent results?

# multithreaded version


#!/usr/local/python2.6/bin/python

import thread
import time

class TCalc(object):

         def __init__(self):
                 self.tactivnum = 0
                 self.reslist = []
                 self.tid = 0
                 self.tlock = thread.allocate_lock()

         def testth(self, tid):
                 if tid % 1000 == 0:
                         print "== Thread %d working ==" % tid
                 self.tlock.acquire()
                 self.reslist.append(tid)
                 self.tactivnum -= 1
                 self.tlock.release()

         def calc_100thousand(self):
                 tid = 1
                 while tid <= 100000:
                         while self.tactivnum > 99:
                                 time.sleep(0.01)
                         self.tlock.acquire()
                         self.tactivnum += 1
                         self.tlock.release()
                         t = thread.start_new_thread(self.testth, (tid,))
                         tid += 1
                 while self.tactivnum > 0:
                         time.sleep(0.01)


if __name__ == "__main__":
         tc = TCalc()
         tstart = time.time()
         tc.calc_100thousand()
         tend = time.time()
         print "Total time: ", tend-tstart



# multiprocessing version

#!/usr/local/python2.6/bin/python

import multiprocessing
import time


def testp(pid):
         if pid % 1000 == 0:
                 print "== Process %d working ==" % pid

def palivelistlen(plist):
         pll = 0
         for p in plist:
                 if p.is_alive():
                         pll += 1
                 else:
                         plist.remove(p)
                         p.join()
         return pll

def testp_100thousand():
         pid = 1
         proclist = []
         while pid <= 100000:
                 while palivelistlen(proclist) > 99:
                         time.sleep(0.01)
                 p = multiprocessing.Process(target=testp, args=(pid,))
                 p.start()
                 proclist.append(p)
                 pid += 1
         print "=== Main thread waiting for all processes to finish ==="
         for p in proclist:
                 p.join()

if __name__ == "__main__":
         tstart = time.time()
         testp_100thousand()
         tend = time.time()
         print "Total time:", tend - tstart





More information about the Python-list mailing list