[Spambayes-checkins] SF.net SVN: spambayes:[3198] trunk/spambayes/spambayes/dnscache.py

montanaro at users.sourceforge.net montanaro at users.sourceforge.net
Tue Nov 25 03:05:35 CET 2008


Revision: 3198
          http://spambayes.svn.sourceforge.net/spambayes/?rev=3198&view=rev
Author:   montanaro
Date:     2008-11-25 02:05:34 +0000 (Tue, 25 Nov 2008)

Log Message:
-----------
use safepickle functions
pylint nits

Modified Paths:
--------------
    trunk/spambayes/spambayes/dnscache.py

Modified: trunk/spambayes/spambayes/dnscache.py
===================================================================
--- trunk/spambayes/spambayes/dnscache.py	2008-11-25 02:03:24 UTC (rev 3197)
+++ trunk/spambayes/spambayes/dnscache.py	2008-11-25 02:05:34 UTC (rev 3198)
@@ -6,7 +6,8 @@
 # Version 0.1 2004 06 27
 # Version 0.11 2004 07 06 Fixed zero division error in __del__
 
-import DNS # From http://sourceforge.net/projects/pydns/
+# From http://sourceforge.net/projects/pydns/
+import DNS
 
 import sys
 import os
@@ -14,31 +15,28 @@
 import time
 import types
 import socket
-try:
-    import cPickle as pickle
-except ImportError:
-    import pickle
 
 from spambayes.Options import options
+from spambayes.safepickle import pickle_read, pickle_write
 
-kCheckForPruneEvery=20
-kMaxTTL=60 * 60 * 24 * 7                # One week
+kCheckForPruneEvery = 20
+kMaxTTL = 60 * 60 * 24 * 7                # One week
 # Some servers always return a TTL of zero.  We'll hold onto data a bit
 # longer.
-kMinTTL=24 * 60 * 60 * 1                # one day
-kPruneThreshold=5000 # May go over slightly; numbers chosen at random
-kPruneDownTo=2500
+kMinTTL = 24 * 60 * 60 * 1                # one day
+kPruneThreshold = 5000 # May go over slightly; numbers chosen at random
+kPruneDownTo = 2500
 
 
 class lookupResult(object):
     #__slots__=("qType","answer","question","expiresAt","lastUsed")
 
-    def __init__(self,qType,answer,question,expiresAt,now):
-        self.qType=qType
-        self.answer=answer
-        self.question=question
-        self.expiresAt=expiresAt
-        self.lastUsed=now
+    def __init__(self, qType, answer, question, expiresAt, now):
+        self.qType = qType
+        self.answer = answer
+        self.question = question
+        self.expiresAt = expiresAt
+        self.lastUsed = now
         return None
 
 
@@ -68,9 +66,9 @@
 
 
 class cache:
-    def __init__(self,dnsServer=None,cachefile=""):
+    def __init__(self, dnsServer=None, cachefile=""):
     # These attributes intended for user setting
-        self.printStatsAtEnd=False
+        self.printStatsAtEnd = False
 
         # As far as I can tell from the standards,
         # it's legal to have more than one PTR record
@@ -83,7 +81,7 @@
         # lookups always return a list. Reverse
         # ("PTR") lookups return a single name unless
         # this attribute is set to False.
-        self.returnSinglePTR=True
+        self.returnSinglePTR = True
 
         # How long to cache an error as no data
         self.cacheErrorSecs=5*60
@@ -98,7 +96,7 @@
 
         if self.cachefile and os.path.exists(self.cachefile):
             try:
-                self.caches = pickle.load(open(self.cachefile, "rb"))
+                self.caches = pickle_read(self.cachefile)
             except:
                 os.unlink(self.cachefile)
 
@@ -118,19 +116,18 @@
         self.misses=0
         self.pruneTicker=0
 
-        if dnsServer==None:
+        if dnsServer == None:
             DNS.DiscoverNameServers()
-            self.queryObj=DNS.DnsRequest()
+            self.queryObj = DNS.DnsRequest()
         else:
-            self.queryObj=DNS.DnsRequest(server=dnsServer)
+            self.queryObj = DNS.DnsRequest(server=dnsServer)
         return None
 
     def close(self):
         if self.printStatsAtEnd:
             self.printStats()
         if self.cachefile:
-            from storage import safe_pickle
-            safe_pickle(self.cachefile, self.caches)
+            pickle_write(self.cachefile, self.caches)
 
     def printStats(self):
         for key,val in self.caches.items():
@@ -139,30 +136,30 @@
                 totAnswers+=len(item)
             print >> sys.stderr, "cache", key, "has", len(self.caches[key]),
             print >> sys.stderr, "question(s) and", totAnswers, "answer(s)"
-        if self.hits+self.misses==0:
+        if self.hits+self.misses == 0:
             print >> sys.stderr, "No queries"
         else:
             print >> sys.stderr, self.hits, "hits,", self.misses, "misses",
             print >> sys.stderr, "(%.1f%% hits)" % \
                   (self.hits/float(self.hits+self.misses)*100)
 
-    def prune(self,now):
+    def prune(self, now):
         # I want this to be as fast as reasonably possible.
         # If I didn't, I'd probably do various things differently
         # Is there a faster way to do this?
-        allAnswers=[]
+        allAnswers = []
         for cache in self.caches.values():
             for val in cache.values():
                 allAnswers += val
 
-        allAnswers=sort_by_attr(allAnswers,"expiresAt")
+        allAnswers = sort_by_attr(allAnswers,"expiresAt")
         allAnswers.reverse()
 
         while True:
-            if allAnswers[-1].expiresAt>now:
+            if allAnswers[-1].expiresAt > now:
                 break
-            answer=allAnswers.pop()
-            c=self.caches[answer.qType]
+            answer = allAnswers.pop()
+            c = self.caches[answer.qType]
             c[answer.question].remove(answer)
             if  not c[answer.question]:
                 del c[answer.question]
@@ -177,12 +174,12 @@
         # some entries least-recently-used-wise. I'm not by any means
         # sure that this is the best strategy, but as yet I don't have
         # data to test different strategies.
-        allAnswers=sort_by_attr(allAnswers,"lastUsed")
+        allAnswers = sort_by_attr(allAnswers, "lastUsed")
         allAnswers.reverse()
-        numToDelete=len(allAnswers)-kPruneDownTo
-        for count in range(numToDelete):
-            answer=allAnswers.pop()
-            c=self.caches[answer.qType]
+        numToDelete = len(allAnswers)-kPruneDownTo
+        for _count in xrange(numToDelete):
+            answer = allAnswers.pop()
+            c = self.caches[answer.qType]
             c[answer.question].remove(answer)
             if not c[answer.question]:
                 del c[answer.question]
@@ -190,86 +187,88 @@
         return None
 
 
-    def formatForReturn(self,listOfObjs):
-        if len(listOfObjs)==1 and listOfObjs[0].answer==None:
+    def formatForReturn(self, listOfObjs):
+        if len(listOfObjs) == 1 and listOfObjs[0].answer == None:
             return []
 
-        if listOfObjs[0].qType=="PTR" and self.returnSinglePTR:
+        if listOfObjs[0].qType == "PTR" and self.returnSinglePTR:
             return listOfObjs[0].answer
 
         return [ obj.answer for obj in listOfObjs ]
 
 
     def lookup(self,question,qType="A"):
-        qType=qType.upper()
+        qType = qType.upper()
         if qType not in ("A","PTR"):
             raise ValueError,"Query type must be one of A, PTR"
 
-        now=int(time.time())
+        now = int(time.time())
 
         # Finding the len() of a dictionary isn't an expensive operation
         # but doing it twice for every lookup isn't necessary.
-        self.pruneTicker+=1
-        if self.pruneTicker==kCheckForPruneEvery:
-            self.pruneTicker=0
+        self.pruneTicker += 1
+        if self.pruneTicker == kCheckForPruneEvery:
+            self.pruneTicker = 0
             if len(self.caches["A"])+len(self.caches["PTR"])>kPruneThreshold:
                 self.prune(now)
 
-        cacheToLookIn=self.caches[qType]
+        cacheToLookIn = self.caches[qType]
 
         try:
-            answers=cacheToLookIn[question]
+            answers = cacheToLookIn[question]
         except KeyError:
             pass
         else:
             if answers:
-                ind=0
+                ind = 0
                 # No guarantee that expire has already been done
                 while ind<len(answers):
-                    thisAnswer=answers[ind]
+                    thisAnswer = answers[ind]
                     if thisAnswer.expiresAt<now:
                         del answers[ind]
                     else:
-                        thisAnswer.lastUsed=now
-                        ind+=1
+                        thisAnswer.lastUsed = now
+                        ind += 1
             else:
                 print >> sys.stderr, "lookup failure:", question
 
             if not answers:
                 del cacheToLookIn[question]
             else:
-                self.hits+=1
+                self.hits += 1
                 return self.formatForReturn(answers)
 
         # Not in cache or we just expired it
-        self.misses+=1
+        self.misses += 1
 
-        if qType=="PTR":
-            qList=question.split(".")
+        if qType == "PTR":
+            qList = question.split(".")
             qList.reverse()
-            queryQuestion=".".join(qList)+".in-addr.arpa"
+            queryQuestion = ".".join(qList)+".in-addr.arpa"
         else:
-            queryQuestion=question
+            queryQuestion = question
 
         # where do we get NXDOMAIN?
         try:
-            reply=self.queryObj.req(queryQuestion,qtype=qType,timeout=self.dnsTimeout)
+            reply = self.queryObj.req(queryQuestion, qtype=qType,
+                                      timeout=self.dnsTimeout)
         except DNS.Base.DNSError,detail:
-            if detail.args[0]<>"Timeout":
+            if detail.args[0] != "Timeout":
                 print >> sys.stderr, "Error, fixme", detail
                 print >> sys.stderr, "Question was", queryQuestion
                 print >> sys.stderr, "Original question was", question
                 print >> sys.stderr, "Type was", qType
-            objs=[ lookupResult(qType,None,question,self.cacheErrorSecs+now,now) ]
-            cacheToLookIn[question]=objs # Add to format for return?
+            objs = [lookupResult(qType, None, question,
+                                 self.cacheErrorSecs+now, now)]
+            cacheToLookIn[question] = objs # Add to format for return?
             return self.formatForReturn(objs)
         except socket.gaierror,detail:
             print >> sys.stderr, "DNS connection failure:", self.queryObj.ns, detail
             print >> sys.stderr, "Defaults:", DNS.defaults
 
-        objs=[]
+        objs = []
         for answer in reply.answers:
-            if answer["typename"]==qType:
+            if answer["typename"] == qType:
                 # PyDNS returns TTLs as longs but RFC 1035 says that the TTL
                 # value is a signed 32-bit value and must be positive, so it
                 # should be safe to coerce it to a Python integer.  And
@@ -277,22 +276,24 @@
                 # (68 years and change) is drunk.  Arguably, I ought to
                 # impose a maximum rather than continuing with longs
                 # (int(long) returns long in recent versions of Python).
-                ttl=max(min(int(answer["ttl"]),kMaxTTL),kMinTTL)
+                ttl = max(min(int(answer["ttl"]), kMaxTTL), kMinTTL)
                 # RFC 2308 says that you should cache an NXDOMAIN for the
                 # minimum of the minimum field of the SOA record and the TTL
                 # of the SOA.
-                if ttl>0:
-                    item=lookupResult(qType,answer["data"],question,ttl+now,now)
+                if ttl > 0:
+                    item = lookupResult(qType, answer["data"], question,
+                                        ttl+now, now)
                     objs.append(item)
 
         if objs:
-            cacheToLookIn[question]=objs
+            cacheToLookIn[question] = objs
             return self.formatForReturn(objs)
 
         # Probably SERVFAIL or the like
         if not reply.authority:
-            objs=[ lookupResult(qType,None,question,self.cacheErrorSecs+now,now) ]
-            cacheToLookIn[question]=objs
+            objs = [lookupResult(qType, None, question,
+                                 self.cacheErrorSecs+now, now)]
+            cacheToLookIn[question] = objs
             return self.formatForReturn(objs)
 
 
@@ -303,44 +304,44 @@
         #
         # RFC 2308 specifies that this how to decide how long to cache an
         # NXDOMAIN.
-        auth=reply.authority[0]
-        auTTL=int(auth["ttl"])
+        auth = reply.authority[0]
+        auTTL = int(auth["ttl"])
         for item in auth["data"]:
-            if type(item)==types.TupleType and item[0]=="minimum":
-                auMin=int(item[1])
-                cacheNeg=min(auMin,auTTL)
+            if type(item) == types.TupleType and item[0] == "minimum":
+                auMin = int(item[1])
+                cacheNeg = min(auMin,auTTL)
                 break
         else:
-            cacheNeg=auTTL
-        objs=[ lookupResult(qType,None,question,cacheNeg+now,now) ]
+            cacheNeg = auTTL
+        objs = [lookupResult(qType, None, question, cacheNeg+now, now)]
 
-        cacheToLookIn[question]=objs
+        cacheToLookIn[question] = objs
         return self.formatForReturn(objs)
 
 
 def main():
     import transaction
-    c=cache(cachefile=os.path.expanduser("~/.dnscache"))
-    c.printStatsAtEnd=True
+    c = cache(cachefile=os.path.expanduser("~/.dnscache"))
+    c.printStatsAtEnd = True
     for host in ["www.python.org", "www.timsbloggers.com",
                  "www.seeputofor.com", "www.completegarbage.tv",
                  "www.tradelinkllc.com"]:
         print >> sys.stderr, "checking", host
-        now=time.time()
-        ips=c.lookup(host)
-        print >> sys.stderr, ips,time.time()-now
-        now=time.time()
-        ips=c.lookup(host)
-        print >> sys.stderr, ips,time.time()-now
+        now = time.time()
+        ips = c.lookup(host)
+        print >> sys.stderr, ips, time.time()-now
+        now = time.time()
+        ips = c.lookup(host)
+        print >> sys.stderr, ips, time.time()-now
 
         if ips:
-            ip=ips[0]
-            now=time.time()
-            name=c.lookup(ip,qType="PTR")
-            print >> sys.stderr, name,time.time()-now
-            now=time.time()
-            name=c.lookup(ip,qType="PTR")
-            print >> sys.stderr, name,time.time()-now
+            ip = ips[0]
+            now = time.time()
+            name = c.lookup(ip, qType="PTR")
+            print >> sys.stderr, name, time.time()-now
+            now = time.time()
+            name = c.lookup(ip, qType="PTR")
+            print >> sys.stderr, name, time.time()-now
         else:
             print >> sys.stderr, "unknown"
 
@@ -348,5 +349,5 @@
 
     return None
 
-if __name__=="__main__":
+if __name__ == "__main__":
     main()


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.


More information about the Spambayes-checkins mailing list