http client with proxy example ?
Duncan Booth
duncan at rcp.co.uk
Thu Nov 9 06:12:14 EST 2000
Kevin Bailey wrote:
>
> Hi,
>
> Could someone point me to or provide a simple example of
> pulling down a webpage using a proxy webserver in Python ?
>
Here is what I use to see what a web server is really sending back:
--------- Begin get.py --------------
import sys,os,regex,urllib,string
import cPickle
from urllib import urlretrieve,urlcleanup,urlopen
def Get(url, HTTP_PROXY=None, ACCEPT_HEADER=None, SHOW=None, **fields):
proxy = None
if HTTP_PROXY: proxy = { 'http': HTTP_PROXY }
d = []
for key, value in fields.items():
d.append('%s=%s' % (key, urllib.quote_plus(value)))
args = string.join(d, '&')
if args:
request = url + '?' + args
else:
request = url
print "GET:",request
try:
u = urllib.FancyURLopener(proxy)
if ACCEPT_HEADER:
if type(ACCEPT_HEADER) != type(''):
ACCEPT_HEADER = string.join(ACCEPT_HEADER, ',')
u.addheader('Accept', ACCEPT_HEADER)
fn, h = u.retrieve(request)
if 'Head' in SHOW:
for l in string.split(str(h), '\n'):
print "HDR:",l
response = open(fn, "rb").read()
if 'Body' in SHOW:
print "RESPONSE has %d bytes" % len(response)
for l in string.split(response, '\n'):
print "RESP:",l
u.cleanup()
except:
exc_info = sys.exc_info()
urlcleanup()
import pdb, traceback
sys.last_type, sys.last_value, sys.last_traceback = exc_info
traceback.print_last()
VERSION = '1.0'
DEFAULT_PROXY = 'http://elastic.rcp.co.uk:3128'
OPTIONS = {
'proxy=': 'Proxy server, set to blank for no proxy. Default: %s' %
DEFAULT_PROXY,
'html': 'Accept HTML response (default if no other format given)',
'text': 'Accept plain text',
'wml': 'Accept WML',
'wmlc': 'Accept WMLC (compiler WML)',
'wbmp': 'Accept Wireless bitmap format.',
'gif': 'Accept GIF files.',
'accept=': 'add mime type to accept header.',
'header': 'Show header lines',
'body': 'Show body'
}
def Usage():
usage = ['Usage: %s [options] url']
opts = OPTIONS.items()
opts.sort()
for opt, desc in opts:
if opt[-1]=='=': opt = opt[:-1] + ' arg'
if len(opt) >= 6: sep = '\n\t'
else: sep = '\t'
usage.append('--%s%s%s' % (opt, sep, desc))
usage.extend(['Version ' + VERSION + ', (c) 2000 RCP Consultants Ltd.
All rights reserved'])
print string.join(usage, '\n') % sys.argv[0]
def FetchUrl(argv):
import getopt
proxy='http://elastic.rcp.co.uk:3128'
accept = []
show = []
try:
opts, args = getopt.getopt(argv[1:], "?", OPTIONS.keys())
except getopt.error, errmsg:
print errmsg
Usage()
sys.exit(2)
for o, v in opts:
if 0: pass
elif o=='--proxy':
proxy = v
elif o=='--html':
accept.append('text/html')
elif o=='--text':
accept.append('text/plain')
elif o=='--wml':
accept.append('text/vnd.wap.wml')
elif o=='--wmlc':
accept.append('application/vnd.wap.wmlc')
elif o=='--wbmp':
accept.append('image/vnd.wap.wbmp')
elif o=='--gif':
accept.append('image/gif')
elif o=='--accept':
accept.append(v)
elif o=='--header':
show.append('Head')
elif o=='--body':
show.append('Body')
elif o=='-?':
Usage()
sys.exit(2)
if not accept: accept.append('text/html')
if not show: show = ['Head', 'Body']
print "Proxy is %s" % proxy
if len(args) != 1:
Usage()
sys.exit(2)
Get(url=args[0], HTTP_PROXY=proxy, ACCEPT_HEADER=accept, SHOW=show)
if __name__=='__main__':
FetchUrl(sys.argv)
--------- End get.py ----------------
More information about the Python-list
mailing list