Urllib2/Pycurl/HTTP Speeds?

Chaos psnim2000 at gmail.com
Tue Aug 8 11:42:34 EDT 2006


For the Program I Am Making I Make Multiple HTTP Request to My Server.
I found that using urllib2 it was pretty slow, when I activated the
program and tested it it would hang from 2secs-5secs since I am doing
it multiple times I wanted to speed it up by using pycurl. But I got
the samething. Here is my code:

import urllib
import os.path
import cookielib
import pycurl
import StringIO

class GoToPage:
    #HTTPHeaders = {'User-agent' : self.browser, 'Accept-Language:
en-us' : 'en-us', 'Accept-Encoding' : 'deflate'}
    FireFox_15 = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US;
rv:1.8.0.6) Gecko/20060728 Firefox/1.5.0.6"
    IE7_B2 = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1;
.NET CLR 1.1.4322)"
    Opera_85 = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en)
Opera 8.51"
    IE6 = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)"
    Mozilla_17 = "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US;
rv:1.7b) Gecko/20040404"
    def __init__(self, myName):
        self.browser = self.FireFox_15
        self.lastPage = ""
        self.cookies = ""
        self.name = myName
        self.wrapper = pycurl.Curl()

        #self.wrapper.setopt(pycurl.AUTOREFERER, 0)
        self.wrapper.setopt(pycurl.COOKIEFILE, self.name)
        #self.cookieJar = cookielib.LWPCookieJar()
        #if self.cookieJar != None:
        #    if os.path.isfile(self.name):
        #        self.cookieJar.load(self.name)
        #self.opener =
urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookieJar))
        #urllib2.install_opener(self.opener)
        return #end Function
    def buildHeaders(self, browser, referer=""):
        if referer != "":
            buildHeaders = ['User-agent: ' + self.browser,
'Accept-Language: en-us', 'Accept-Encoding:
gzip,compress;q=0.9,deflate;q=0', 'Referer:' + referer]
        else:
            buildHeaders = ['User-agent: ' + self.browser,
'Accept-Language: en-us', 'Accept-Encoding:
gzip,compress;q=0.9,deflate;q=0']
        return buildHeaders
    def saveCookies(self, cookies):
        fileHandle = open (self.name, 'w')
        fileHandle.write (cookies)
        fileHandle.close()
    def GetPage(self, URL, referer=""):
        theHeaders = self.buildHeaders(self.browser, referer)
        returnVal = StringIO.StringIO()
        self.wrapper.setopt(pycurl.URL, URL)
        self.wrapper.setopt(pycurl.HTTPHEADER, theHeaders)
        self.wrapper.setopt(pycurl.WRITEFUNCTION, returnVal.write)
        self.wrapper.perform()
        self.wrapper.close()
        #self.saveCookies(self.wrapper.getinfo(pycurl.COOKIELIST))
        #self.cookieJar.save(self.name)
        return returnVal.getvalue()

    def PostPage(self, URL, data, referer=""):
        timer = wx.StopWatch()
        theHeaders = self.buildHeaders(self.browser, referer)
        print timer.Time()
        timer.Start(0)
        returnVal = StringIO.StringIO()
        self.wrapper.setopt(pycurl.URL, URL)
        self.wrapper.setopt(pycurl.POSTFIELDS, data)
        self.wrapper.setopt(pycurl.HTTPHEADER, theHeaders)
        self.wrapper.setopt(pycurl.WRITEFUNCTION, returnVal.write)
        print str(timer.Time()) + ' before perform'
        timer.Start(0)
        self.wrapper.perform()
        print str(timer.Time()) + ' after perform'
        self.wrapper.close()
        #self.saveCookies(self.wrapper.getinfo(pycurl.COOKIELIST))
        #self.cookieJar.save(self.name)
        return returnVal.getvalue()

The Urlib2 source is lost, and there are timer functions in there. I
call it like this

import GoToPage
newHTTP = GoToPage.GoToPage("name")




More information about the Python-list mailing list