[Python-checkins] r68295 - sandbox/trunk/dbm_sqlite/Lib/dbm/sqlite.py

skip.montanaro python-checkins at python.org
Sun Jan 4 13:11:54 CET 2009


Author: skip.montanaro
Date: Sun Jan  4 13:11:54 2009
New Revision: 68295

Log:
Rework performance measurements a bit.
Only commit once every 100 calls to __setitem__.
Explicitly commit on close.


Modified:
   sandbox/trunk/dbm_sqlite/Lib/dbm/sqlite.py

Modified: sandbox/trunk/dbm_sqlite/Lib/dbm/sqlite.py
==============================================================================
--- sandbox/trunk/dbm_sqlite/Lib/dbm/sqlite.py	(original)
+++ sandbox/trunk/dbm_sqlite/Lib/dbm/sqlite.py	Sun Jan  4 13:11:54 2009
@@ -6,116 +6,149 @@
 * Obvious speed problems (all tests performed on 2.2GHz MacBook Pro running
   OSX 10.5.4 with SQLite 3.6.2):
 
-  - Read performance:
-    # Using a file...
+    PYTHON=python3.0
+
+    $PYTHON -c 'import sqlite3
+    print("module version:", sqlite3.version_info)
+    print("sqlite version:", sqlite3.sqlite_version_info)
+    '
+
+    echo
+    echo 'read from file'
     for m in dumb gnu ndbm sqlite ; do
         echo $m
         for n in 10 100 ; do
             rm -f /tmp/trash.db*
-            python3.0 -m timeit -s 'import dbm.'$m' as db' \
-                                -s 'f = db.open("/tmp/trash.db", "c")' \
-                                -s 'f[b"1"] = b"a"' \
-                                'for i in range('$n'): x = f[b"1"]'
-        done
-    done
-    dumb
-    100 loops, best of 3: 4.06 msec per loop
-    10 loops, best of 3: 43.8 msec per loop
-    gnu
-    1000 loops, best of 3: 304 usec per loop
-    100 loops, best of 3: 2.98 msec per loop
-    ndbm
-    10000 loops, best of 3: 29.1 usec per loop
-    1000 loops, best of 3: 299 usec per loop
-    sqlite
-    10 loops, best of 3: 25.9 msec per loop
-    10 loops, best of 3: 271 msec per loop
-    # Using :memory:
+            $PYTHON -m timeit -s 'import dbm.'$m' as db' \
+                              -s 'f = db.open("/tmp/trash.db", "c")' \
+                              -s 'f[b"1"] = b"a"' \
+                              'for i in range('$n'): x = f[b"1"]'
+        done
+    done
+
+    echo
+    echo 'read from :memory:'
     for m in sqlite ; do
         echo $m
         for n in 10 100 ; do
-            python3.0 -m timeit -s 'import dbm.'$m' as db' \
-                                -s 'f = db.open(":memory:", "c")' \
-                                -s 'f[b"1"] = b"a"' \
-                                'for i in range('$n'): x = f[b"1"]'
+            $PYTHON -m timeit -s 'import dbm.'$m' as db' \
+                              -s 'f = db.open(":memory:", "c")' \
+                              -s 'f[b"1"] = b"a"' \
+                              'for i in range('$n'): x = f[b"1"]'
         done
     done
-    sqlite
-    1000 loops, best of 3: 249 usec per loop
-    100 loops, best of 3: 2.48 msec per loop
 
-  - Write performance:
-    # Using a file...
+    echo
+    echo 'write to file'
     for m in dumb gnu ndbm sqlite ; do
         echo $m
         for n in 10 100 ; do
             rm -f /tmp/trash.db*
-            python3.0 -m timeit -s 'import dbm.'$m' as db' \
-                      -s 'f = db.open("/tmp/trash.db", "c")' \
-                      'for i in range('$n'): f[bytes(str(i), "ascii")] = bytes(str(i), "ascii")'
-        done
-    done
-    100 loops, best of 3: 3.92 msec per loop
-    10 loops, best of 3: 41.9 msec per loop
-    gnu
-    1000 loops, best of 3: 301 usec per loop
-    100 loops, best of 3: 3.02 msec per loop
-    ndbm
-    10000 loops, best of 3: 28.9 usec per loop
-    1000 loops, best of 3: 296 usec per loop
-    sqlite
-    10 loops, best of 3: 22.5 msec per loop
-    10 loops, best of 3: 278 msec per loop
-    # Using :memory:
+            $PYTHON -m timeit -s 'import dbm.'$m' as db' \
+                    -s 'f = db.open("/tmp/trash.db", "c")' \
+                    'for i in range('$n'): f[bytes(str(i), "ascii")] = bytes(str(i), "ascii")'
+        done
+    done
+
+    echo
+    echo 'write to :memory:'
     for m in sqlite ; do
         echo $m
         for n in 10 100 ; do
-            python3.0 -m timeit -s 'import dbm.'$m' as db' \
-                                -s 'f = db.open(":memory:", "c")' \
-                                'for i in range('$n'): f[bytes(str(i), "ascii")] = bytes(str(i), "ascii")'
+            $PYTHON -m timeit -s 'import dbm.'$m' as db' \
+                              -s 'f = db.open(":memory:", "c")' \
+                              'for i in range('$n'): f[bytes(str(i), "ascii")] = bytes(str(i), "ascii")'
         done
     done
-    sqlite
-    1000 loops, best of 3: 452 usec per loop
-    100 loops, best of 3: 4.95 msec per loop
 
-  - Keys:
-    # Using a file...
+    echo
+    echo 'keys from file'
     for m in dumb gnu ndbm sqlite ; do
         echo $m
         for n in 10 100 ; do
             rm -f /tmp/trash.db*
-            python3.0 -m timeit -s 'import dbm.'$m' as db' \
-                                -s 'f = db.open("/tmp/trash.db", "c")' \
-                                -s 'f[b"1"] = b"a"' \
-                                'for i in range('$n'): x = f.keys()'
-        done
-    done
-    dumb
-    10000 loops, best of 3: 21.6 usec per loop
-    1000 loops, best of 3: 210 usec per loop
-    gnu
-    10000 loops, best of 3: 36.6 usec per loop
-    1000 loops, best of 3: 349 usec per loop
-    ndbm
-    100000 loops, best of 3: 5.49 usec per loop
-    10000 loops, best of 3: 50.8 usec per loop
-    sqlite
-    1000 loops, best of 3: 562 usec per loop
-    100 loops, best of 3: 5.55 msec per loop
-    # Using :memory:
+            $PYTHON -m timeit -s 'import dbm.'$m' as db' \
+                              -s 'f = db.open("/tmp/trash.db", "c")' \
+                              -s 'f[b"1"] = b"a"' \
+                              'for i in range('$n'): x = f.keys()'
+        done
+    done
+
+    echo
+    echo 'keys from :memory:'
     for m in sqlite ; do
         echo $m
         for n in 10 100 ; do
-            python3.0 -m timeit -s 'import dbm.'$m' as db' \
-                                -s 'f = db.open(":memory:", "c")' \
-                                -s 'f[b"1"] = b"a"' \
-                                'for i in range('$n'): x = f.keys()'
-        done
-    done
-    sqlite
-    10000 loops, best of 3: 185 usec per loop
-    1000 loops, best of 3: 1.85 msec per loop
+            $PYTHON -m timeit -s 'import dbm.'$m' as db' \
+                              -s 'f = db.open(":memory:", "c")' \
+                              -s 'f[b"1"] = b"a"' \
+                              'for i in range('$n'): x = f.keys()'
+        done
+    done
+
+As of 2009-01-04 I get this output on a 2.2GHz MacBook Pro:
+
+    module version: (2, 4, 1)
+    sqlite version: (3, 6, 2)
+
+    read from file
+    dumb 10: 1000 loops, best of 3: 1.03 msec per loop
+    dumb 100: 100 loops, best of 3: 10.2 msec per loop
+    dumb 1000: 10 loops, best of 3: 103 msec per loop
+    gnu 10: 100000 loops, best of 3: 4.32 usec per loop
+    gnu 100: 10000 loops, best of 3: 38.8 usec per loop
+    gnu 1000: 1000 loops, best of 3: 394 usec per loop
+    ndbm 10: 100000 loops, best of 3: 3.54 usec per loop
+    ndbm 100: 10000 loops, best of 3: 31.2 usec per loop
+    ndbm 1000: 1000 loops, best of 3: 321 usec per loop
+    sqlite 10: 1000 loops, best of 3: 261 usec per loop
+    sqlite 100: 100 loops, best of 3: 2.53 msec per loop
+    sqlite 1000: 10 loops, best of 3: 25.7 msec per loop
+
+    read from :memory:
+    sqlite 10: 1000 loops, best of 3: 237 usec per loop
+    sqlite 100: 100 loops, best of 3: 2.31 msec per loop
+    sqlite 1000: 10 loops, best of 3: 23.1 msec per loop
+
+    write to file
+    dumb 10: 100 loops, best of 3: 3.88 msec per loop
+    dumb 100: 10 loops, best of 3: 42.4 msec per loop
+    dumb 1000: 10 loops, best of 3: 424 msec per loop
+    gnu 10: 1000 loops, best of 3: 295 usec per loop
+    gnu 100: 100 loops, best of 3: 2.94 msec per loop
+    gnu 1000: 10 loops, best of 3: 30 msec per loop
+    ndbm 10: 10000 loops, best of 3: 27.3 usec per loop
+    ndbm 100: 1000 loops, best of 3: 281 usec per loop
+    ndbm 1000: 100 loops, best of 3: 6.07 msec per loop
+    sqlite 10: 1000 loops, best of 3: 764 usec per loop
+    sqlite 100: 100 loops, best of 3: 8.39 msec per loop
+    sqlite 1000: 10 loops, best of 3: 95.9 msec per loop
+
+    write to :memory:
+    sqlite 10: 1000 loops, best of 3: 295 usec per loop
+    sqlite 100: 100 loops, best of 3: 3.3 msec per loop
+    sqlite 1000: 10 loops, best of 3: 35.1 msec per loop
+
+    keys from file
+    dumb 10: 100000 loops, best of 3: 14.7 usec per loop
+    dumb 100: 10000 loops, best of 3: 142 usec per loop
+    dumb 1000: 1000 loops, best of 3: 1.44 msec per loop
+    gnu 10: 10000 loops, best of 3: 35.9 usec per loop
+    gnu 100: 1000 loops, best of 3: 354 usec per loop
+    gnu 1000: 100 loops, best of 3: 3.52 msec per loop
+    ndbm 10: 100000 loops, best of 3: 5.72 usec per loop
+    ndbm 100: 10000 loops, best of 3: 52.5 usec per loop
+    ndbm 1000: 1000 loops, best of 3: 534 usec per loop
+    sqlite 10: 10000 loops, best of 3: 181 usec per loop
+    sqlite 100: 1000 loops, best of 3: 1.8 msec per loop
+    sqlite 1000: 100 loops, best of 3: 18.3 msec per loop
+
+    keys from :memory:
+    sqlite 10: 10000 loops, best of 3: 165 usec per loop
+    sqlite 100: 1000 loops, best of 3: 1.67 msec per loop
+    sqlite 1000: 100 loops, best of 3: 16.6 msec per loop
+
+Note the poor performance compared to dbm.dumb while reading keys.
 """
 
 import sqlite3
@@ -131,6 +164,7 @@
         self._filename = filename
         self._conn = sqlite3.connect(filename)
         self.initialize_table()
+        self._writes = 0
 
     def initialize_table(self):
         c = self._conn.cursor()
@@ -152,7 +186,9 @@
     def __setitem__(self, key, val):
         c = self._conn.cursor()
         c.execute("replace into dict (key, value) values (?, ?)", (key, val))
-        self._conn.commit()
+        self._writes += 1
+        if self._writes % 100 == 0:
+            self._conn.commit()
 
     def __delitem__(self, key):
         # Complain if it's not there.
@@ -163,18 +199,18 @@
 
     def iterkeys(self):
         c = self._conn.cursor()
-        c.execute("select key from dict order by key")
+        c.execute("select key from dict order by rowid")
         return (e[0] for e in c)
     __iter__ = iterkeys
 
     def itervalues(self):
         c = self._conn.cursor()
-        c.execute("select value from dict order by key")
+        c.execute("select value from dict order by rowid")
         return (e[0] for e in c)
 
     def iteritems(self):
         c = self._conn.cursor()
-        c.execute("select key, value from dict order by key")
+        c.execute("select key, value from dict order by rowid")
         return (e for e in c)
 
     def __contains__(self, key):
@@ -201,6 +237,7 @@
 
     def close(self):
         if self._conn is not None:
+            self._conn.commit()
             self._conn.close()
         self._conn = None
 


More information about the Python-checkins mailing list