Help beautify ugly heuristic code

Stuart D. Gathman stuart at bmsi.com
Wed Dec 8 16:09:43 EST 2004


I have a function that recognizes PTR records for dynamic IPs.  There is
no hard and fast rule for this - every ISP does it differently, and may
change their policy at any time, and use different conventions in
different places.  Nevertheless, it is useful to apply stricter
authentication standards to incoming email when the PTR for the IP
indicates a dynamic IP (namely, the PTR record is ignored since it doesn't
mean anything except to the ISP).  This is because Windoze Zombies are the
favorite platform of spammers.

Here is the very ugly code so far.  It offends me to look at it, but
haven't had any better ideas.  I have lots of test data from mail logs.

# examples we don't yet recognize:
#
# 1Cust65.tnt4.atl4.da.uu.net at ('67.192.40.65', 4588)
# 1Cust200.tnt8.bne1.da.uu.net at ('203.61.67.200', 4144)
# 1Cust141.tnt30.rtm1.nld.da.uu.net at ('213.116.154.141', 2036)
# user64.net2045.mo.sprint-hsd.net at ('67.77.185.64', 3901)
# wiley-268-8196.roadrunner.nf.net at ('205.251.174.46', 4810)
# 221.fib163.satnet.net at ('200.69.163.221', 3301)
# cpc2-ches1-4-0-cust8.lutn.cable.ntl.com at ('80.4.105.8', 61099)
# user239.res.openband.net at ('65.246.82.239', 1392)
# xdsl-2449.zgora.dialog.net.pl at ('81.168.237.145', 1238)
# spr1-runc1-4-0-cust25.bagu.broadband.ntl.com at ('80.5.10.25', 1684)
# user-0c6s7hv.cable.mindspring.com at ('24.110.30.63', 3720)
# user-0c8hvet.cable.mindspring.com at ('24.136.253.221', 4529)
# user-0cdf5j8.cable.mindspring.com at ('24.215.150.104', 3783)
# mmds-dhcp-11-143.plateautel.net at ('63.99.131.143', 4858)
# ca-santaanahub-cuda3-c6b-134.anhmca.adelphia.net at ('68.67.152.134', 62047)
# cbl-sd-02-79.aster.com.do at ('200.88.62.79', 4153)
# h105n6c2o912.bredband.skanova.com at ('213.67.33.105', 3259)

import re

ip3 = re.compile('([0-9]{1,3})[.x-]([0-9]{1,3})[.x-]([0-9]{1,3})')
rehmac = re.compile(
 'h[0-9a-f]{12}[.]|pcp[0-9]{6,10}pcs[.]|no-reverse|S[0-9a-f]{16}[.][a-z]{2}[.]'
)

def is_dynip(host,addr):
  """Return True if hostname is for a dynamic ip.
  Examples:

  >>> is_dynip('post3.fabulousdealz.com','69.60.99.112')
  False
  >>> is_dynip('adsl-69-208-201-177.dsl.emhril.ameritech.net','69.208.201.177')
  True
  >>> is_dynip('[1.2.3.4]','1.2.3.4')
  True
  """
  if host.startswith('[') and host.endswith(']'):
    return True
  if addr:
    if host.find(addr) >= 0: return True
    a = addr.split('.')
    ia = map(int,a)
    m = ip3.search(host)
    if m:
      g = map(int,m.groups())
      if g == ia[1:] or g == ia[:3]: return True
      if g[0] == ia[3] and g[1:] == ia[:2]: return True
      g.reverse()
      if g == ia[1:] or g == ia[:3]: return True
    if rehmac.search(host): return True
    if host.find("%s." % '-'.join(a[2:])) >= 0: return True
    if host.find("w%s." % '-'.join(a[:2])) >= 0: return True
    if host.find("dsl%s-" % '-'.join(a[:2])) >= 0: return True
    if host.find(''.join(a[:3])) >= 0: return True
    if host.find(''.join(a[1:])) >= 0: return True
    x = "%02x%02x%02x%02x" % tuple(ia)
    if host.lower().find(x) >= 0: return True
    z = [n.zfill(3) for n in a]
    if host.find('-'.join(z)) >= 0: return True
    if host.find("-%s." % '-'.join(z[2:])) >= 0: return True
    if host.find("%s." % ''.join(z[2:])) >= 0: return True
    if host.find(''.join(z)) >= 0: return True
    a.reverse()
    if host.find("%s." % '-'.join(a[:2])) >= 0: return True
    if host.find("%s." % '.'.join(a[:2])) >= 0: return True
    if host.find("%s." % a[0]) >= 0 and \
      host.find('.adsl.') > 0 or host.find('.dial-up.') > 0: return True
  return False

if __name__ == '__main__':
  import fileinput
  for ln in fileinput.input():
    a = ln.split()
    if len(a) == 2:
      ip,host = a
      if host.startswith('[') and host.endswith(']'):
        continue        # no PTR
      if is_dynip(host,ip):
        print ip,host



More information about the Python-list mailing list