object references/memory access

Alex Martelli aleax at mac.com
Sun Jul 1 17:47:48 EDT 2007


dlomsak <dlomsak at gmail.com> wrote:
   ...
> search and return takes a fraction of a second. For a large return (in
> this case 21,000 records - 8.3 MB) is taking 18 seconds. 15 of those
> seconds are spent sending the serialized results from the server to
> the client. I did a little bit of a blind experiment and doubled the

So here's a tiny example to show that the mere transfer of bytes on the
socket should be taking nowhere like that long:

#!/usr/local/bin/python
import socket, os, time, sys

port = 8881
sendsize = 1024
recvsize = 1024
totsize = 8*1024*sendsize

def server():
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    sock.bind(('', 8881))
    sock.listen(5)
    newSocket, address = sock.accept()
    totbytes = 0
    start = time.time()
    while totbytes < totsize:
        receivedData = newSocket.recv(recvsize)
        if not receivedData: break
        totbytes += len(receivedData)
    newSocket.close()
    sock.close()
    return totbytes, time.time()-start

def client():
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.connect(('localhost', 8881))
    totbytes = 0
    while totbytes < totsize:
        sock.sendall(sendsize*'x')
        totbytes += sendsize
    sock.close()

def main():
    print "moving %d bytes (ss=%d, rs=%d)" % (totsize, sendsize,
recvsize)
    if os.fork():
        # parent process
        forbytes, tooktime = server()
    else:
        # child process
        time.sleep(0.5)
        client()
        sys.exit(0)
    stend = time.time()
    print "%d bytes in %5.2f sec (ss=%d, rs=%d)" % (forbytes,
            tooktime, sendsize, recvsize)

main()


brain:~/downloads alex$ python sere.py 
moving 8388608 bytes (ss=1024, rs=1024)
8388608 bytes in  0.08 sec (ss=1024, rs=1024)

So, moving 8.3 MB on a bare socket should take about 100 milliseconds,
give or take.

So let's try WITH pickling and unpickling (done right):

#!/usr/local/bin/python
import socket, os, time, sys, random, cPickle

port = 8881
sendsize = 1024
recvsize = 1024

data = [random.random() for i in xrange(1000*1000)]
pickled_data = cPickle.dumps(data, 2)
totsize = len(pickled_data)

def server():
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    sock.bind(('', 8881))
    sock.listen(5)
    newSocket, address = sock.accept()
    totbytes = 0
    recvdata = []
    start = time.time()
    while totbytes < totsize:
        receivedData = newSocket.recv(recvsize)
        if not receivedData: break
        totbytes += len(receivedData)
        recvdata.append(receivedData)
    newSocket.close()
    sock.close()
    data = cPickle.loads(''.join(recvdata))
    return totbytes, time.time()-start

def client():
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.connect(('localhost', 8881))
    totbytes = 0
    while totbytes < totsize:
        totbytes += sock.send(pickled_data[totbytes:totbytes+sendsize])
    sock.close()

def main():
    print "moving %d bytes (ss=%d, rs=%d)" % (totsize, sendsize,
recvsize)
    if os.fork():
        # parent process
        forbytes, tooktime = server()
    else:
        # child process
        time.sleep(0.5)
        client()
        sys.exit(0)
    stend = time.time()
    print "%d bytes in %5.2f sec (ss=%d, rs=%d)" % (forbytes,
            tooktime, sendsize, recvsize)

main()


brain:~/downloads alex$ python sere.py 
moving 9002006 bytes (ss=1024, rs=1024)
9002006 bytes in  0.32 sec (ss=1024, rs=1024)

So, a bit more data, quite a bit longer, but still on the order of
magnitude of 300 milliseconds or so.

Again this suggests the problems are not "intrinsic" to the task.

It's hard to guess at exactly what it may be that you're doing wrong.
For example, if recvdata was a string (grown with +=) rather than a list
(grown with append), this would boost the runtime to 0.76 seconds; a
huge waste (more than a factor of two blown away by a minor programming
gaucheness) but still a long way from the several orders of magniture
you're observing.

So, I suggest you try programming the interaction directly to bare
sockets, as I do here (and in several examples in Chapter 20 in "Python
in a Nutshell" 2nd edition), and see what difference that makes to your
timings.


Alex



More information about the Python-list mailing list