Python Google Server
vegetax
vegeta.z at gmail.com
Tue Apr 5 06:55:47 EDT 2005
fuzzyman at gmail.com wrote:
lol ,cool hack!! make a slashdot article about it!!
> I've hacked together a 'GoogleCacheServer'. It is based on
> SimpleHTTPServer. Run the following script (hopefully google groups
> won't mangle the indentation) and set your browser proxy settings to
> 'localhost:8000'. It will let you browse the internet using google's
> cache. Obviously you'll miss images, javascript, css files, etc.
>
> See the world as google sees it !
>
> (This is actually an 'inventive' short term measure to get round a
> restrictive internet policy at work :-) I'll probably put it in the
> Python Cookbook as it's quite fun (so if line lengths or indentation is
> mangled here, try there). Tested on Windows XP, with Python 2.3 and IE.
>
>
>
> # Copyright Michael Foord, 2004 & 2005.
> # Released subject to the BSD License
> # Please see http://www.voidspace.org.uk/documents/BSD-LICENSE.txt
>
> # For information about bugfixes, updates and support, please join the
> Pythonutils mailing list.
> # http://voidspace.org.uk/mailman/listinfo/pythonutils_voidspace.org.uk
> # Comments, suggestions and bug reports welcome.
> # Scripts maintained at http://www.voidspace.org.uk/python/index.shtml
> # E-mail fuzzyman at voidspace.org.uk
>
> import google
> import BaseHTTPServer
> import shutil
> from StringIO import StringIO
> import urlparse
>
> __version__ = '0.1.0'
>
>
> """
> This is a simple implementation of a server that fetches web pages
> from the google cache.
>
> It lets you explore the internet from your browser, using the google
> cache.
>
> Run this script and then set your browser proxy settings to
> localhost:8000
>
> Needs google.py (and a google license key).
> See http://pygoogle.sourceforge.net/
> and http://www.google.com/apis/
> """
>
> cached_types = ['txt', 'html', 'htm', 'shtml', 'shtm', 'cgi', 'pl',
> 'py']
> google.setLicense(google.getLicense())
> googlemarker = '''<i>Google is not affiliated with the authors of this
> page nor responsible for its
>
content.</i></font></center></td></tr></table></td></tr></table>\n<hr>\n'''
> markerlen = len(googlemarker)
>
> class googleCacheHandler(BaseHTTPServer.BaseHTTPRequestHandler):
> server_version = "googleCache/" + __version__
> cached_types = cached_types
> googlemarker = googlemarker
> markerlen = markerlen
>
> def do_GET(self):
> f = self.send_head()
> if f:
> self.copyfile(f, self.wfile)
> f.close()
>
> def send_head(self):
> """Common code for GET and HEAD commands.
>
> This sends the response code and MIME headers.
>
> Return value is either a file object (which has to be copied
> to the outputfile by the caller unless the command was HEAD,
> and must be closed by the caller under all circumstances), or
> None, in which case the caller has nothing further to do.
>
> """
> print self.path
> url = urlparse.urlparse(self.path)[2]
> dotloc = url.find('.') + 1
> if dotloc and url[dotloc:] not in self.cached_types:
> return None # not a cached type - don't even try
>
> thepage = google.doGetCachedPage(self.path)
> headerpos = thepage.find(self.googlemarker)
> if headerpos != -1: # remove the google header
> pos = self.markerlen + headerpos
> thepage = thepage[pos:]
>
> f = StringIO(thepage)
>
> self.send_response(200)
> self.send_header("Content-type", 'text/html')
> self.send_header("Content-Length", str(len(thepage)))
> self.end_headers()
> return f
>
> def copyfile(self, source, outputfile):
> shutil.copyfileobj(source, outputfile)
>
>
> def test(HandlerClass = googleCacheHandler,
> ServerClass = BaseHTTPServer.HTTPServer):
> BaseHTTPServer.test(HandlerClass, ServerClass)
>
>
> if __name__ == '__main__':
> test()
>
More information about the Python-list
mailing list