http client with proxy example ?

Duncan Booth duncan at rcp.co.uk
Thu Nov 9 06:12:14 EST 2000


Kevin Bailey wrote:
> 
> Hi,
> 
> Could someone point me to or provide a simple example of
> pulling down a webpage using a proxy webserver in Python ?
> 
Here is what I use to see what a web server is really sending back:

--------- Begin get.py --------------
import sys,os,regex,urllib,string
import cPickle
from urllib import urlretrieve,urlcleanup,urlopen

def Get(url, HTTP_PROXY=None, ACCEPT_HEADER=None, SHOW=None, **fields):
    proxy = None
    if HTTP_PROXY: proxy = { 'http': HTTP_PROXY }
    d = []
    for key, value in fields.items():
        d.append('%s=%s' % (key, urllib.quote_plus(value)))

    args = string.join(d, '&')
    if args:
        request = url + '?' + args
    else:
        request = url
    print "GET:",request
    try:
        u = urllib.FancyURLopener(proxy)
        if ACCEPT_HEADER:
            if type(ACCEPT_HEADER) != type(''):
                ACCEPT_HEADER = string.join(ACCEPT_HEADER, ',')
            u.addheader('Accept', ACCEPT_HEADER)

        fn, h = u.retrieve(request)
        if 'Head' in SHOW:
            for l in string.split(str(h), '\n'):
                print "HDR:",l
        response = open(fn, "rb").read()
        if 'Body' in SHOW:
            print "RESPONSE has %d bytes" % len(response)
            for l in string.split(response, '\n'):
                print "RESP:",l
        u.cleanup()
    except:
        exc_info = sys.exc_info()
        urlcleanup()
        import pdb, traceback
        sys.last_type, sys.last_value, sys.last_traceback = exc_info
        traceback.print_last()

VERSION = '1.0'
DEFAULT_PROXY = 'http://elastic.rcp.co.uk:3128'
OPTIONS = {
    'proxy=': 'Proxy server, set to blank for no proxy. Default: %s' % 
DEFAULT_PROXY,
    'html': 'Accept HTML response (default if no other format given)',
    'text': 'Accept plain text',
    'wml': 'Accept WML',
    'wmlc': 'Accept WMLC (compiler WML)',
    'wbmp': 'Accept Wireless bitmap format.',
    'gif': 'Accept GIF files.',
    'accept=': 'add mime type to accept header.',
    'header': 'Show header lines',
    'body': 'Show body'
    }

def Usage():
    usage = ['Usage: %s [options] url']
    opts = OPTIONS.items()
    opts.sort()
    for opt, desc in opts:
        if opt[-1]=='=': opt = opt[:-1] + ' arg'
        if len(opt) >= 6: sep = '\n\t'
        else: sep = '\t'
        usage.append('--%s%s%s' % (opt, sep, desc))
    usage.extend(['Version ' + VERSION + ', (c) 2000 RCP Consultants Ltd. 
All rights reserved'])
    print string.join(usage, '\n') % sys.argv[0]
    
def FetchUrl(argv):
    import getopt
    proxy='http://elastic.rcp.co.uk:3128'
    accept = []
    show = []
    try:
        opts, args = getopt.getopt(argv[1:], "?", OPTIONS.keys())
    except getopt.error, errmsg:
        print errmsg
        Usage()
        sys.exit(2)

    for o, v in opts:
        if 0: pass
        elif o=='--proxy':
            proxy = v
        elif o=='--html':
            accept.append('text/html')
        elif o=='--text':
            accept.append('text/plain')
        elif o=='--wml':
            accept.append('text/vnd.wap.wml')
        elif o=='--wmlc':
            accept.append('application/vnd.wap.wmlc')
        elif o=='--wbmp':
            accept.append('image/vnd.wap.wbmp')
        elif o=='--gif':
            accept.append('image/gif')
        elif o=='--accept':
            accept.append(v)
        elif o=='--header':
             show.append('Head')
        elif o=='--body':
             show.append('Body')
        elif o=='-?':
            Usage()
            sys.exit(2)

    if not accept: accept.append('text/html')
    if not show: show = ['Head', 'Body']
    print "Proxy is %s" % proxy
    if len(args) != 1:
        Usage()
        sys.exit(2)
    Get(url=args[0], HTTP_PROXY=proxy, ACCEPT_HEADER=accept, SHOW=show)

if __name__=='__main__':
    FetchUrl(sys.argv)

--------- End get.py ----------------



More information about the Python-list mailing list