Python Web Servers and Page Retrievers

Subscriber123 subscriber123 at gmail.com
Sun Apr 8 17:42:33 EDT 2007


I wrote most of the following script, useful for retrieving pages from the
web and serving web pages. Since it is so low level, it is much more
customizable than simpleHTTPserver, cgiHTTPserver, urllib, or urllib2 for
advanced users. For example, you can easily set your own headers when
retrieving and serving pages, such as the User-Agent header which you cannot
set in either urllib or urllib2.

(sorry for not putting in any comments!)

By the way, I just threw this together quickly, and haven't really had time
to test retrieve() very much. Please let me know if it is buggy.
I guess I should also write a dictToQuery() function. Oh well.


import socket
>
> host,port='',80
>
> sock=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
> sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
> sock.bind((host,port))
> sock.listen(1)
>
> def serve(function=lambda *args:(args[2],200,'OK',{},'')):
>     """\
> def serve(function(method,filename,httpversion,headers,get,post))
>
>     Serves one request, calling function() with the above
>     parameters. function() must return (httpversion,code,
>     accepted,headers,content) in that order. If you don't
>     pass a function, then
>     function=lambda *args:(args[2],200,'OK',{},'')
> """
>
>     csock,caddr=sock.accept()
>     rfile=csock.makefile('r',0)
>     wfile=csock.makefile('w',0)
>
>     # Protocol exchange - read request
>     headers={}
>     line=rfile.readline().strip()
>     split1=line.find(' ')
>     method,remainder=line[:split1].strip(),line[split1+1:].strip()
>     split2=remainder.find(' ')
>
> filename,httpversion=remainder[:split2].strip(),remainder[split2+1:].strip()
>     while 1:
>         line=rfile.readline().strip()
>         print line
>         if line=='':
>             break
>         else:
>             split=line.find(':')
>             key,value=line[:split],line[split+1:]
>             headers[key.strip()]=value.strip()
>
>     try:
>         post=rfile.read(int(headers['Content-Length']))
>     except:
>         post=''
>     get=queryToDict(filename)
>     post=queryToDict(post)
>     loc=filename.find("?")
>     if loc>-1:
>         filename=filename[:loc]
>     print "get:",`get`
>     print "post:",`post`
>
> httpversion,code,accepted,headers,content=function(method,filename,httpversion,headers,get,post)
>     wfile.write("%s %s %s\n"%(httpversion,code,accepted))
>     for header in list(headers):
>         wfile.write("%s: %s\n"%(header,headers[header]))
>     wfile.write("\n%s\n"%content)
>     wfile.close()
>     csock.close()
>
> def
> retrieve(host,port=80,method='GET',filename='/',httpversion='HTTP/1.0',headers={},post=''):
>     """\
> Retrieves one web page from:
>     http://host:port/filename
> with the headers
> """
>     sock.connect((host,port))
>     rfile=sock.makefile('r',0)
>     wfile=sock.makefile('w',0)
>     wfile.write("%s %s %s\n"%(method,filename,httpversion))
>     for header in list(headers):
>         wfile.write("%s: %s\n"%(header,headers[header]))
>     wfile.write('\n')
>     wfile.write("%s\n"%post)
>
>     headers={}
>     line=rfile.readline().strip()
>     split1=line.find(' ')
>     httpversion,remainder=line[:split1].strip(),line[split1+1:].strip()
>     split2=remainder.find(' ')
>     code,accepted=remainder[:split2].strip(),remainder[split2+1:].strip()
>     while 1:
>         line=rfile.readline().strip()
>         if line=='':
>             break
>         else:
>             split=line.find(':')
>             key,value=line[:split],line[split+1:]
>             headers[key.strip()]=value.strip()
>     return httpversion,code,accepted,headers,rfile
>
> def queryToDict(query):
>     if '?' in query:
>         query=query[query.index('?')+1:]
>     kvpairs=query.split("&")
>     ret={}
>     for kvpair in kvpairs:
>         if '=' in kvpair:
>             loc=kvpair.index('=')
>             key,value=kvpair[:loc],kvpair[loc+1:]
>             ret[key]=value
>     return ret
>
> if __name__=='__main__':
>     i=0
>     while True:
>         i+=1
>         print "\nserve #%d:"%i
>         serve(lambda
> *args:(args[2],200,'OK',{'Content-Type':'text/html'},'<h1>Go Away!</h1>'))
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-list/attachments/20070408/8a052d4a/attachment.html>


More information about the Python-list mailing list