Python threading

Robert.R.Emmel at gmail.com Robert.R.Emmel at gmail.com
Thu Aug 30 12:53:01 EDT 2007


Hello,

I am using the threading module and the Queue module in python to to
send out shipment tracking URL requests.

Is there a way to timeout a thread within a Queue?

I think the way I have it now the thread will wait until something is
returned and will basically wait forever for that something.

Between the waiting for something to be returned and the timeouts on
the database connections, the program appears to just hang like it has
stopped working. Any suggestions?

Here is a sample of code I am using (left out some of the code for
brevity):

import urllib
from HTMLParser import HTMLParser
import threading
import Queue

NTHREADS = 100

...

def Web_Retrieve(inpque, outqueue):
    N = 1
    try:
        connection = MySQLdb.connect(host="hostname", port=3306,
user="username", passwd="password", db="Mydatabase")
        print "t" + str(N)+ " - Database Connection Established !!"
        cursor = connection.cursor()
    except MySQLdb.OperationalError, message:
        errorMessage = "t" + str(N)+ " -Error %d:\n%s" % (message[0],
message[1])
    else:

        DBRow = inpque.get()
        while not (DBRow[0] == ''):
            PNum = DBRow[1]
            PNum = RemoveHyphens(RemoveSpaces(PNum))
            print "t" + str(N)+ " -PNum : " + PNum
...
                # Setup URL to retrieve status web page depending on
Carrier SCAC
                    if SCAC == "RDWY":
                        pURL = "http://www.quiktrak.roadway.com/cgi-
bin/quiktrak?"
                        bURL = "&pro0=" + PNum

                        if DEBUG > 90 or (DEBUG > 30 and DEBUG < 40):
                            print "t" + str(N)+ " -URL: ", pURL +
bURL
                        WURL = pURL + bURL

                        # Retrieve status web page, assign it to a
variable and close connection
                        try:
                            f = urllib.urlopen(pURL+bURL)
                            s = f.read()
                            f.close()
                        except IOError, e:
                            print 't' + str(N)+ ' -I/O Error:
',e.strerror
 ##                           continue

...

                outqueue.put((PR, s, WURL, s12))
                DBRow = inpque.get()
                N = N+ 1
        outqueue.put(None)
        cursor.close()
        connection.commit()
        connection.close()
        print "t" + str(N)+ " -Database Closed"

## Main processing file.
def WebProcess(Mode):
    ## Get file listing from the proper directory depending on mode
    if Mode == 'B':
        IFilenames = os.listdir(os.curdir+os.sep+'Input')
        NTHREADS = 100
...

    # Open connection to MySql
    try:
        connection = MySQLdb.connect(host="hostname", port=3306,
user="username", passwd="password", db="mydatabase")
        print "Database Connection Established !!"
        cursor = connection.cursor()
    except MySQLdb.OperationalError, message:
        errorMessage = "Error %d:\n%s" % (message[0], message[1])
    else:


        inputs = Queue.Queue(0)
        results = Queue.Queue(0)
        thread_pool = []

        for ii in range(NTHREADS):
            thread = threading.Thread(target=Web_Retrieve,
args=(inputs, results))
            thread.start()
            thread_pool.append(thread)

        ## Retrieve BOL's from Database for web retrieval
        print "Current Mode is : ", Mode
        print "Length of Mode string is : ", len(Mode)
        Mode_results = cursor.execute( "Select * from bol_table where
Mode = %s and (Expired IS NULL or Expired IS False);", (Mode.strip()))
        print "Mode Query Results: ", Mode_results
        if Mode_results > 0:
            print " Do Web Page lookups !!! "
            DBRows = cursor.fetchall()
            for DBRow in DBRows:
                inputs.put(DBRow)

            for ii in range(len(DBRows)):
                PR, s, WURL, s12 = results.get()
##                print "PR is : "+PR+" S's first 100 char's are
"+s[0:100]+" WURL is : "+WURL
                print
"+-------------------------------------------------------------+"
                print "PR is : " + PR
                print "s is (First 100) : " + s[0:100]
                print "WURL is : " + WURL
                print "s12 is : " + s12
                print
"+-------------------------------------------------------------+"

....

## Clear out thread pool

            for thread in thread_pool:
                inputs.put(('',''))

....


For the times I have run into the "<scr" + "ipt>" on web pages I take
the f.read() string s and pass it to this function:

## fix webpages that have the 'scr' + 'ipt' split on them.
def fixscript(x):
    SQ = 0
    EQ = 0
##    if DEBUG > 90:
##        print "fixscript length of x: ", len(x)
    while True:
        SQ = x.find('scr" + "ipt', SQ + 12)
##        if DEBUG > 90:
##            print "SQ : ", SQ
        if SQ <= 0:
            break
        x = x[0:SQ + 3] + x[SQ + 8:]
    return x

Just passing this along to those who have run into this problem when
parsing a web page.




More information about the Python-list mailing list