proxy testing code with multi-thread

shore.cloud at gmail.com shore.cloud at gmail.com
Fri Feb 1 13:27:16 EST 2008


hi all
I'm now meeting some problem when using threading to test whether or
not a proxy is good or not
and the total number of proxies is just 60
but each time my programme stopped just at approaching the end,
say,
for each proxy I made a proxy
but it just stopped working at maybe 58 or 59 or some other similar
number
below is the code
any advice will be greatly appreciated!
-------------------------------------------------------------------------------------------------------------------------------
import urllib2
import threading
import time
import traceback
import sys

class mythread(threading.Thread):
    def __init__(self, agent, i):
        self.agent = agent
        self.index = i
        threading.Thread.__init__(self)

    def run(self):
        tryagent(self.agent, self.index)

def tryagent(agent, i):
    global mutex, bad, goodlist, total
    proxy_handler = urllib2.ProxyHandler({'http':'http://' + agent})
    opener = urllib2.build_opener(proxy_handler)
    try:
        response = opener.open('http://www.indeed.com/q-Director-Of-
Finance-jobs.html')
        data_web = response.read()
        if data_web.find('All director finance job') == -1:
            #mutex.acquire( )
            bad = bad + 1
            total = total + 1
            print 'agent %d %s is bad in try' %(i, agent,)
            print 'bad number: %d total:%d' %(bad,total)
            #mutex.release( )

        else:
            #mutex.acquire( )
            total = total + 1
            print 'agent %d %s is good' %(i, agent)
            goodlist.append(agent)
            #mutex.release( )
    except:
       # mutex.release( )
        #mutex.acquire( )
        traceback.print_exc()
        total = total + 1
        bad = bad + 1
        print 'agent %d %s is bad in except' %(i, agent,)
        print 'bad number: %d total: %d' %(bad,total)
        #mutex.release( )


agentlist = open('F:\\Python25\\works\\agent.txt',
'r').read().split('\n')
mutex = threading.Lock()
total = 0
bad = 0
goodlist = []
threads = []
print 'total proxy number:%d' %(len(agentlist))

if len(agentlist) <= 100:
    for i, agent in enumerate(agentlist):
        threads.append(mythread(agent, i))
    for thread in threads:
        thread.start()
    for thread in threads:
        thread.join()
else:
    i = 1
    while i * 100 < len(agentlist):
        i = i + 1
        threads = []
        for j in range(100):
            threads.append(agentlist[j + 100 * i])
        for thread in threads:
            thread.start()
        for thread in threads:
            thread.join()
    i = i - 1
    threads = []
    for k in range(len(agentlist) - 100 * i):
        threads.append(agentlist[100 * i + k])
    for thread in threads:
        thread.start()
    for thread in threads:
        thread.join()


urllib2.socket.setdefaulttimeout(1)
newproxysock = open('F:\\Python25\\works\\proxy.txt', 'w')
saveout = sys.stdout
sys.stdout = newproxysock
for i, proxy in enumerate(goodlist):
    print proxy
sys.stdout = saveout
newproxysock.close()




More information about the Python-list mailing list