Python Threads

Sick Monkey sickcodemonkey at gmail.com
Thu Feb 22 21:58:46 EST 2007


I think that I found a solution to my thread issues, however I know it is
not the most efficient method possible.
Just to give you a little information on what this project is all about....
   I have 3 lists of email addresses.
      (1)  "host email address" =  contains a list of all of my emails
address (around 150,000 users)
      (2)  "email addresses"  =  contains a list of email addresses that I
have to match with the file "host email address".  If there are any matches,
then I have to print them out to a file. (this could be up to 8 million
users)
      (3)  "domain addresses" =  contains a list of email domains that I
have to match with the "host email address" file.  If there are any matched,
then I have to print them out to  a file. (could be 2000 or more domains)

  When running the application, you will have the "host email address" and
can have either one or both of the other files running at the same time.

My problem was that when the application ran, it appeared to stall.  I
decided to use threads for (1) the processing of data and (2) the progress
bar.  The solution I found that enabled the two threads to communicate was
the use of global variables.

I know this is this is not the most efficient method but, using this
solution, I do not see the stalling issue that I found before (which is a
good thing).  I am still not happy with it, because I know it is not
efficient, but I found this to be the best solution for my needs.

Thoughts?

The code is below.  Before you see the code, I must thank everyone who
helped me with this project (including the open source coders).
===================
#! /usr/bin/env python
import difflib, sys, thread, re, os, time import Tkinter from Tkinter import
* from sets import Set import tkFileDialog, tkMessageBox  from tkFileDialog
import *
listName = ['','','']
threadStat = 0
mailsrch = re.compile(r'[\w\-][\w\-\.]+@[\w\-][\w\-\.]+[a-zA-Z]{1,4}')
domsrch = re.compile(r"@(\S+)")
statusVar = 0.0  # for the progress bar
startProgress = 0
################################################################
class Meter(Tkinter.Frame):
    def __init__(self, master, width=300, height=20, bg='black',
fillcolor='cyan',\
                 value=0.0, text=None, font=None, textcolor='white', *args,
**kw):
        Tkinter.Frame.__init__(self, master, bg=bg, width=width,
height=height, *args, **kw)
        self._value = value

        self._canv = Tkinter.Canvas(self, bg=self['bg'],
width=self['width'], height=self['height'],\
                                    highlightthickness=0, relief='flat',
bd=0)
        self._canv.pack(fill='both', expand=1)
        self._rect = self._canv.create_rectangle(0, 0, 0,
self._canv.winfo_reqheight(), fill=fillcolor,\
                                                 width=0)
        self._text = self._canv.create_text(self._canv.winfo_reqwidth()/2,
self._canv.winfo_reqheight()/2,\
                                            text='', fill=textcolor)
        if font:
            self._canv.itemconfigure(self._text, font=font)

        self.set(value, text)
        self.bind('<Configure>', self._update_coords)

    def _update_coords(self, event):
        '''Updates the position of the text and rectangle inside the canvas
when the size of
        the widget gets changed.'''
        self._canv.update_idletasks()
        self._canv.coords(self._text, self._canv.winfo_width()/2,
self._canv.winfo_height()/2)
        self._canv.coords(self._rect, 0, 0,
self._canv.winfo_width()*self._value, self._canv.winfo_height())
        self._canv.update_idletasks()

    def get(self):
        return self._value, self._canv.itemcget(self._text, 'text')

    def set(self, value=0.0, text=None):
        #make the value failsafe:
        if value < 0.0:
            value = 0.0
        elif value > 1.0:
            value = 1.0
        self._value = value
        if text == None:
            #if no text is specified use the default percentage string:
            text = str(int(round(100 * value))) + ' %'
        self._canv.coords(self._rect, 0, 0, self._canv.winfo_width()*value,
self._canv.winfo_height())
        self._canv.itemconfigure(self._text, text=text)
        self._canv.update_idletasks()


##########################################################
def fail(msg):
    out = sys.stderr.write
    out(msg + "\n\n")
    out(__doc__)
    return 0
################################################################
def fopen(fname):
    try:
        return open(fname, 'U')
    except IOError, detail:
        return fail("couldn't open " + fname + ": " + str(detail))
################################################################
def fetchFiles(file1,file2,file3): #file1: host list file2 = email list;
file3=domain; method=    method = ''
   print file1
   print file2
   print file3
   f1 = fopen(file1)
   a = f1.readlines(); f1.close()
   d1 = {}
   for c in a:
      for m in mailsrch.findall(c):
         d1[m.lower()] = None
   print "starting list 2"
   thread.start_new_thread(showProcessing, ())
   #DOMAIN COMPARISON    if file2 == '':
      domain(d1,file3)
   #EMAIL COMPARISON    elif file3 == '':
      email(d1,file2)
   #BOTH    else:
      both(d1,file2,file3)
############################################################### def domain
(d1,file3):
   f3 = fopen(file3)
   domains = f3.readlines(); f3.close()
   print len(domains)
   totalLen = len(domains)
   print totalLen
   try:
        progressInc = abs(1.0/totalLen)
   except:
        tkMessageBox.showerror (
            "What are you doing?",
            "One of your files had no information.  I cannot process this, I
QUIT."
        )
        global threadStat
        threadStat = 1
        progressInc = 1

   print progressInc
   global statusVar

   utp = open("data/emailMatch.txt","w")
   domainList = []
   for domain in domains:
     domainList.extend(domsrch.findall(domain.lower()))
   domainsSet = set(domainList)
   for key in d1:
      name, domain = key.split("@",1)
      if domain.lower() in domainsSet:
         utp.write(key + '\n')
      statusVar += progressInc
   utp.close()
   endProc()
###############################################################
def email (d1, file2):
   f2 = fopen(file2)
   method = 'email'
   emails = f2.readlines(); f2.close()

   totalLen = len(emails)
   print totalLen
   try:
        progressInc = abs(1.0/totalLen)
   except:
        tkMessageBox.showerror (
            "What are you doing?",
            "One of your files had no information.  I cannot process this, I
QUIT."
        )
        global threadStat
        threadStat = 1
        progressInc = 1

   print progressInc
   global statusVar
   utp = open("data/emailMatch.txt","w")
   for email in emails:
      for n in mailsrch.findall(email.lower()):
         if d1.has_key( n ):
            utp.write(n + '\n')
      statusVar += progressInc
   utp.close()
   print "I am done with email comparison"
   endProc()
###############################################################
def both (d1, file2, file3):
   #doing the Domains first    f3 = fopen(file3)
   domains = f3.readlines(); f3.close()

   f2 = fopen(file2)
   method = 'email'
   emails = f2.readlines(); f2.close()

   totalLen = len(domains) + len(emails)
   print totalLen
   try:
        progressInc = abs(1.0/totalLen)
   except:
        tkMessageBox.showerror (
            "What are you doing?",
            "One of your files had no information.  I cannot process this, I
QUIT."
        )
        global threadStat
        threadStat = 1
        progressInc = 1
   print progressInc
   global statusVar

   finList = []
   domainList = []
   for domain in domains:
     domainList.extend(domsrch.findall(domain.lower()))
   domainsSet = set(domainList)
   for key in d1:
      name, domain = key.split("@",1)
      if domain.lower() in domainsSet:
         finList.append(key)
      statusVar += progressInc
   print "I am done with domain comparison"
   #Next do email addresses       for email in emails:
      for n in mailsrch.findall(email.lower()):
         if d1.has_key( n ):
             finList.append(n)
      statusVar += progressInc
   print "I am done with email comparison"
   print "removing duplication"
   #removeDups(finList)
   dupFreeList = removeDups(finList)
   dupFreeList.sort()

   utp = open("data/emailMatch.txt","w")
   for emails in dupFreeList:
      utp.write(emails + '\n')
   utp.close()
   print "i am done doing both"
   endProc()

###############################################################
def removeDups(s):
   n = len(s)
   if n == 0:
      return []
   u = {}
   try:
      for x in s:
         u[x] = 1
   except TypeError:
      del u  # move on to the next method
   else:
      return u.keys()
   try:
      t = list(s)
      t.sort()
   except TypeError:
      del t  # move on to the next method
   else:
      assert n > 0
      last = t[0]
      lasti = i = 1
      while i < n:
         if t[i] != last:
            t[lasti] = last = t[i]
            lasti += 1
         i += 1
      return t[:lasti]
   # Brute force is all that's left.    u = []    for x in s:
      if x not in u:
         u.append(x)
   return u

###############################################################
def endProc():
  global threadStat
  threadStat = 1
  thread.exit()
###############################################################
def showProcessing():
   mroot = Tkinter.Tk(className='Worker Bee')    metric = Meter(mroot,
relief='ridge', bd=3)    metric.pack(fill='x')
   setInc = 0.1
   global statusVar
   global threadStat

   while threadStat == 0:
      if statusVar < 0.3:
        message = "YAWN.  Have any coffee"
      elif statusVar < 0.5 and statusVar > 0.3:
        message = "Im working, so you dont have to."
      elif statusVar < 0.7 and statusVar > 0.5:
        message = "I hope you sold something, to pay me off"
      else:
        message = "Almost there chief."
      metric.set(statusVar, message)
      time.sleep(10)
   metric.set(1.0, 'WOOT WOOT WOOT.  DONE')
   print threadStat
###############################################################

def startProc():
    noisy = 1
    qseen = rseen = 0

    #print listName     f1name = listName[0]
    f2name = listName[1]
    f3name = listName[2]
    if f1name == '':
      tkMessageBox.showerror (
            "Open file",
            "You must upload host email list."
        )

      method = 'failed'
      print "ERROR!  You need to upload host email address"
    elif f2name =='' and f3name == '':
      tkMessageBox.showerror (
            "Open file",
            "You must upload another document to compare host list."
        )

      method = 'failed'

      print "ERROR!  You need to upload another file"
    else:
      thread.start_new_thread(fetchFiles, (f1name,f2name,f3name,))
      global threadStat
      while threadStat == 0: pass

###############################################################
def openMax():
    a = tkFileDialog.askopenfilename()
    listName[0] = a

def openEmail():
    b = tkFileDialog.askopenfilename()
    listName[1] = b

def openDomain():
    c = tkFileDialog.askopenfilename()
    listName[2] = c

###############################################################

main = Tk()
bframe = Frame(main)
main.title("Suppression Utility")

b1 = Button(bframe,text='Host Email List',command=openMax)
b2 = Button(bframe,text='Email List',command=openEmail)
b3 = Button(bframe,text='Domain List',command=openDomain)
b4 = Button(text='Start Processing',command=startProc)

bframe.pack(side=TOP)
b1.pack(side=LEFT)
b3.pack(side=RIGHT)
b2.pack(side=RIGHT)
b4.pack(side=BOTTOM)

main.mainloop()
#######################################

========================
Dave Huggins


On 2/18/07, Gabriel Genellina <gagsl-py at yahoo.com.ar> wrote:
>
> En Sun, 18 Feb 2007 23:37:02 -0300, Sick Monkey <sickcodemonkey at gmail.com>
> escribió:
>
> > Well if this cannot be done, can a thread call a function in the main
> > method?
> > I have been trying and have not been successive.  Perhaps I am using
> > thread
> > incorrectly.
>
> The safe way to pass information between threads is to use Queue. From
> inside the working thread, you put() an item with enough state
> information. On the main (GUI) thread, you use after() to check for any
> data in the queue, and then update the interfase accordingly.
> I think there is a recipe in the Python Cookbook
> http://aspn.activestate.com/ASPN/Cookbook/Python
>
> --
> Gabriel Genellina
>
> --
> http://mail.python.org/mailman/listinfo/python-list
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-list/attachments/20070222/add62248/attachment.html>


More information about the Python-list mailing list