querry on queue ( thread safe ) multithreading

Jaiprakash Singh jaiprakash at wisepromo.com
Tue Mar 11 01:52:32 EDT 2014


hey i am working on scraping a site , so  i am using multi-threading concept.
i wrote a code based on queue (thread safe) but still my code block out after sometime, please help , i have searched a lot but unable to resolve it. please help i stuck here.

my code is under ..

+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

import subprocess
import multiprocessing
import logging
from scrapy import cmdline
import time

logging.basicConfig(level=logging.DEBUG,
                    format='[%(levelname)s] (%(threadName)-10s) %(message)s',)


num_fetch_threads = 150
enclosure_queue = multiprocessing.JoinableQueue()



def main3(i, q):
    for pth in iter(q.get, None):
        try:
            cmdline.execute(['scrapy',  'runspider',   'page3_second_scrapy_flipkart.py',  '-a',  'pth=%s' %(pth)])
            print pth
        except:
            pass

        time.sleep(i + 2)
        q.task_done()

    q.task_done()




def main2(output):
    procs = []

    for i in range(num_fetch_threads):
        procs.append(multiprocessing.Process(target=main3, args=(i, enclosure_queue,)))
        #worker.setDaemon(True)
        procs[-1].start()

    for pth in output:
        enclosure_queue.put(pth)

    print '*** Main thread waiting'
    enclosure_queue.join()
    print '*** Done'

    for p in procs:
        enclosure_queue.put(None)

    enclosure_queue.join()

    for p in procs:
        p.join()


 



More information about the Python-list mailing list