Python Google Server

Tue Apr 5 06:55:47 EDT 2005

fuzzyman at gmail.com wrote:

lol ,cool hack!! make a slashdot article about it!!

> I've hacked together a 'GoogleCacheServer'. It is based on
> SimpleHTTPServer. Run the following script (hopefully google groups
> won't mangle the indentation) and set your browser proxy settings to
> 'localhost:8000'. It will let you browse the internet using google's
> cache. Obviously you'll miss images, javascript, css files, etc.
> 
> See the world as google sees it !
> 
> (This is actually an 'inventive' short term measure to get round a
> restrictive internet policy at work :-) I'll probably put it in the
> Python Cookbook as it's quite fun (so if line lengths or indentation is
> mangled here, try there). Tested on Windows XP, with Python 2.3 and IE.
> 
> 
> 
> # Copyright Michael Foord, 2004 & 2005.
> # Released subject to the BSD License
> # Please see http://www.voidspace.org.uk/documents/BSD-LICENSE.txt
> 
> # For information about bugfixes, updates and support, please join the
> Pythonutils mailing list.
> # http://voidspace.org.uk/mailman/listinfo/pythonutils_voidspace.org.uk
> # Comments, suggestions and bug reports welcome.
> # Scripts maintained at http://www.voidspace.org.uk/python/index.shtml
> # E-mail fuzzyman at voidspace.org.uk
> 
> import google
> import BaseHTTPServer
> import shutil
> from StringIO import StringIO
> import urlparse
> 
> __version__ = '0.1.0'
> 
> 
> """
> This is a simple implementation of a server that fetches web pages
> from the google cache.
> 
> It lets you explore the internet from your browser, using the google
> cache.
> 
> Run this script and then set your browser proxy settings to
> localhost:8000
> 
> Needs google.py (and a google license key).
> See http://pygoogle.sourceforge.net/
> and http://www.google.com/apis/
> """
> 
> cached_types = ['txt', 'html', 'htm', 'shtml', 'shtm', 'cgi', 'pl',
> 'py']
> google.setLicense(google.getLicense())
> googlemarker = '''<i>Google is not affiliated with the authors of this
> page nor responsible for its
>
content.</i></font></center></td></tr></table></td></tr></table>\n<hr>\n'''
> markerlen = len(googlemarker)
> 
> class googleCacheHandler(BaseHTTPServer.BaseHTTPRequestHandler):
>     server_version = "googleCache/" + __version__
>     cached_types = cached_types
>     googlemarker = googlemarker
>     markerlen = markerlen
> 
>     def do_GET(self):
>         f = self.send_head()
>         if f:
>             self.copyfile(f, self.wfile)
>             f.close()
> 
>     def send_head(self):
>         """Common code for GET and HEAD commands.
> 
>         This sends the response code and MIME headers.
> 
>         Return value is either a file object (which has to be copied
>         to the outputfile by the caller unless the command was HEAD,
>         and must be closed by the caller under all circumstances), or
>         None, in which case the caller has nothing further to do.
> 
>         """
>         print self.path
>         url = urlparse.urlparse(self.path)[2]
>         dotloc = url.find('.') + 1
>         if dotloc and url[dotloc:] not in self.cached_types:
>             return None     # not a cached type - don't even try
> 
>         thepage = google.doGetCachedPage(self.path)
>         headerpos = thepage.find(self.googlemarker)
>         if headerpos != -1: # remove the google header
>             pos = self.markerlen + headerpos
>             thepage = thepage[pos:]
> 
>         f = StringIO(thepage)
> 
>         self.send_response(200)
>         self.send_header("Content-type", 'text/html')
>         self.send_header("Content-Length", str(len(thepage)))
>         self.end_headers()
>         return f
> 
>     def copyfile(self, source, outputfile):
>         shutil.copyfileobj(source, outputfile)
> 
> 
> def test(HandlerClass = googleCacheHandler,
>          ServerClass = BaseHTTPServer.HTTPServer):
>     BaseHTTPServer.test(HandlerClass, ServerClass)
> 
> 
> if __name__ == '__main__':
>     test()
>