Please critique my script
Peter Otten
__peter__ at web.de
Sat Jul 16 04:08:07 EDT 2011
Ellerbee, Edward wrote:
> I've been working on this for 3 weeks as a project while I'm learning
> python. It's ugly, only function in there is from a fellow lister, but
> it works perfectly and does what it is intended to do.
>
> I'd love to hear comments on how I could improve this code, what would
> be good to turn into a function/class etc.
>
> # The function of this script is to gather user data from the user to
> # build appropriate local dial-peers in a cisco voice gateway.
> # Data gathered from the user:
> # name of file to write dial-peers to
> # NPA
> # NXX
> # outbound port to send calls
> # question if home npa local calls are 7 digit
> # script navigates to nanpa.com, gathers the local calling area
> # and returns the data
> # the data is converted to beautiful soup (I had a problem with the xml
> format)
> # the data is parsed for the proper NPA/NXXs
> # list is sorted, duplicates removed
> # data from list is formatted and printed to the file specified
>
>
> #--------Import dependencies-----------
> import urllib2
> from BeautifulSoup import BeautifulSoup
> import re
>
> #--------Define variables------------
> count = 0
> count2 = 0
> count3 = 0
> npalist = []
> nxxlist = []
> sortlist = []
> sortedlist = []
> npaReg = re.compile('(?<=<npa>)(...)(?=</npa>)')
> nxxReg = re.compile('(?<=<nxx>)(...)(?=</nxx>)')
> proxy = urllib2.ProxyHandler({'http': 'proxy.com:8080'})
> # --actual proxy was removed for confidentiality--
> opener = urllib2.build_opener(proxy)
>
> #----- function to concatenate numbers - thanks to Chris Angelico -----
> def combine(list_of_numbers, position):
> lastprefix=tails=lastsuffix=None
> result=[]
> for cur in list_of_numbers:
> prefix=cur[:position]; tail=cur[position];
> suffix=cur[position+1:]
> if prefix!=lastprefix or suffix!=lastsuffix:
> if lastprefix!=None:
> if len(tails)>1:
> result.append("%s[%s]%s"%(lastprefix,tails,lastsuffix))
> else: result.append(lastprefix+tails+lastsuffix)
> lastprefix,tails,lastsuffix=prefix,"",suffix
> tails+=tail
> if lastprefix!=None:
> if len(tails)>1:
> result.append("%s[%s]%s"%(lastprefix,tails,lastsuffix))
> else: result.append(lastprefix+tails+lastsuffix)
> return result
>
>
> #-------Gather info from user--------
> x = raw_input("please enter a filename: ")
> y = raw_input("please enter the npa: ")
> z = raw_input("please enter the nxx: ")
> p = raw_input("please enter the port: ")
> q = raw_input("Is home npa local dialing 7 digit?(y/n) ")
> #print x
>
> #-------Modify user data-------------
> o = open(x, 'w')
> y = str(y)
> z = str(z)
> p = str(p)
> pagedef = ("http://www.localcallingguide.com/xmllocalprefix.php?npa=" +
> y + "&nxx=" + z)
> print "Querying", pagedef
>
> #------Get info from NANPA.com ----------
> urllib2.install_opener(opener)
> page = urllib2.urlopen(pagedef)
> soup = BeautifulSoup(page)
> soup = str(soup)
>
> #------Parse Gathered Data----------
> for line in npaReg.findall(soup):
> npalist.insert(count,line)
> count = count + 1
>
> for line2 in nxxReg.findall(soup):
> nxxlist.insert(count2,line2)
> count2 = count2 + 1
>
> #-----Sort, remove duplicates, concatenate the last digits for similiar
> NPA/NXX ------
> for makenewlist in range(0,count):
> sortlist.append(npalist.pop(0) + nxxlist.pop(0))
>
> sortlist.sort()
>
> for sortednumber in sortlist:
> if sortednumber not in sortedlist:
> sortedlist.append(sortednumber)
>
> catlist = combine(sortedlist,5)
> catlist2 = combine(catlist,4)
>
> #------Print the dial-peers to file----
> for line in catlist2:
> figureDpn = count3 + 1000
> dpn = str(figureDpn)
> label = "dial-peer voice " + dpn
> o.write(label)
> o.write('\n')
> o.write("description *** local outbound dialpeer ***")
> o.write('\n')
> destpatt = "destination-pattern %s...." % line.rstrip()
> o.write(destpatt)
> o.write('\n')
> port = "port " + p
> o.write(port)
> o.write('\n')
> if line[0:3] == y and q == "y":
> o.write("forward-digits 7")
> o.write('\n')
> o.write('\n')
> count3 = count3 + 1
>
> o.close()
- Most important: read or reread the tutorial. It should give you an idea
about the basics like adding items to a list.
- Use self-evident variable names
- Avoid global variables
- Prefer docstrings over comments
- Split your code into small chunks that do one thing well, i. e. functions.
At the top-level your script could look like this
def main():
args = read_arguments()
url = make_url(args.npa, args.nxx)
page = download_page(url)
npa_nxx_pairs = scrape_page(page)
clusters = combine_pairs(npa_nxx_pairs)
with open(args.destination_file) as out:
write_data(out, clusters, args.npa, args.port, args.seven_digit)
if __name__ == "__main__"
main()
This makes dependencies clear, avoids globals, and reduces the need for
comments. You can test separate functionality separately. NB: As I have no
expertise in the problem domain the variable names I used above are a bit
too abstract.
- Convert flags to boolean early. Bad:
seven_digit = raw_input(...)
...
if seven_digit == "y": ...
Better:
seven_digit = raw_input(...) == "y"
...
if seven_digit: ...
- Relentlessly eliminate dead code. Example: page = str(BeautifulSoup(page))
is almost a no-op. You don't need BeautifulSoup at all!
- Don't comment out code, remove it. Rely on version control if you think
that you may need it again later.
- The page you are scraping is xml. Regular expressions are not the best
tool to deal with that. Here's a sample implementation of scrape_page()
using lxml and xpath:
from lxml import etree
def scrape_page(page):
root = etree.fromstring(page)
npas = root.xpath("//prefix/npa/text()")
nxxs = root.xpath("//prefix/nxx/text()")
pairs = (npa + nxx for npa, nxx in zip(npas, nxxs))
return sorted(set(pairs))
An xpath expert might be able to simplify that.
- I prefer to pass script arguments on the commandline rather than answering
a sequence of questions interactively. With that approach get_arguments()
becomes something like
import argparse
def get_arguments():
parser = argparse.ArgumentParser()
parser.add_argument("npa")
parser.add_argument("nxx")
parser.add_argument("destfile", nargs="?")
parser.add_argument("--port", default="1234")
parser.add_argument("-7", "--seven-digit", action="store_true")
return parser.parse_args()
Sadly the instructive help="..." parameters are all missing...
More information about the Python-list
mailing list