[Python-checkins] r69199 - sandbox/trunk/dbm_sqlite/alt/time_sqlite.py

raymond.hettinger python-checkins at python.org
Mon Feb 2 07:49:12 CET 2009


Author: raymond.hettinger
Date: Mon Feb  2 07:49:12 2009
New Revision: 69199

Log:
Add a fragmentation step and a vacuum step.  Record the results.

Modified:
   sandbox/trunk/dbm_sqlite/alt/time_sqlite.py

Modified: sandbox/trunk/dbm_sqlite/alt/time_sqlite.py
==============================================================================
--- sandbox/trunk/dbm_sqlite/alt/time_sqlite.py	(original)
+++ sandbox/trunk/dbm_sqlite/alt/time_sqlite.py	Mon Feb  2 07:49:12 2009
@@ -1,16 +1,8 @@
 ''' Testbench and timing routines to figure-out the fastest schema for shelves.
 
-With unique index or primary key or no index.
-    Insertion speed (do we pay a high price for reindexing on every commit)
-    Individual Lookup speed (regular value, masked key, uncommitted key)
-    Speed of replacing vs inserting.
 
-    Speed of:
-            'select *'
-            'select * order by key'
-            'select * order by rowid'
 
-    Do 'select key', 'select *', and 'select value' all produce the same order?
+Todo:
 
 Delayed commits:
     How much insertion speed is gained by n-period commits
@@ -19,19 +11,10 @@
 
 Do 'select key', 'select *', and 'select value' all produce the same order?
 
-Same tests, but first fragment the snot out of the db.
-    
-What about deletions and fragmentation?
-Cost and effects of vacuuming.
-BLOB vs TEXT
-
------------------------------------------------------------------
-no diff twixt "select *" and "select k,v"           # prefer latter for specificity
-no diff twixt "select *" and "select * by rowid"    # prefer latter for specificity
-no diff for "select *" whether unindexed, primary key, and indexed
-? do those results hold after fragmenting
-"order by key" is twice as slow as unordered, even with indexing
-effect of "primary key" and a unique index is the same
+With unique index or primary key or no index.
+    Insertion speed (do we pay a high price for reindexing on every commit)
+    Individual Lookup speed (regular value, masked key, uncommitted key)
+
 '''
 
 from random import *
@@ -54,6 +37,22 @@
     CREATE UNIQUE INDEX IF NOT EXISTS keyndx ON shelf (key);
 ''', 'UNIQ')
 
+
+MAKE_shelf = ('''
+    DROP INDEX IF EXISTS keyndx; DROP TABLE IF EXISTS shelf;
+    CREATE TABLE IF NOT EXISTS shelf (key BLOB NOT NULL, value BLOB NOT NULL);
+''', 'UNINDEXED')
+MAKE_shelf_PRIMARY = ('''
+    DROP INDEX IF EXISTS keyndx; DROP TABLE IF EXISTS shelf;
+    CREATE TABLE IF NOT EXISTS shelf (key BLOB PRIMARY KEY, value BLOB NOT NULL);
+''', 'PRIMARY')
+MAKE_shelf_UNIQUE = ('''
+    DROP INDEX IF EXISTS keyndx; DROP TABLE IF EXISTS shelf;
+    CREATE TABLE IF NOT EXISTS shelf (key BLOB NOT NULL, value BLOB NOT NULL);
+    CREATE UNIQUE INDEX IF NOT EXISTS keyndx ON shelf (key);
+''', 'UNIQ')
+
+
 SELECTORS = [
     'SELECT * FROM shelf',
     'SELECT * FROM shelf ORDER BY key',
@@ -88,6 +87,18 @@
     conn.commit()
     return conn
 
+def fragmentit(conn, addlist, dellist, n=20):
+    # Make random adds and deletes, n at time
+    addlist = addlist[:]
+    dellist = dellist[:]
+    while len(addlist) >= n and len(dellist) >= n:
+        for i in range(n):
+            conn.execute('DELETE FROM shelf WHERE ROWID = ?', dellist.pop())
+            conn.commit()
+        for i in range(n):
+            conn.execute('REPLACE INTO shelf (key, value) VALUES (?, ?)', addlist.pop())
+            conn.commit()
+
 def timeit(conn, stmt, n=20):
     conn.execute(stmt)          # precompile
     start = clock()   
@@ -95,15 +106,133 @@
         conn.execute(stmt).fetchall()
     return '%.2f' % (clock() - start)
     
-
+n, m = 2000, 6000
+fragment, vacuum = True, True
 seed('xyzpdqbingo')
-items = populate(2000)
+items = populate(n)
+dellist = [(randrange(n),) for i in range(m)]
+addlist = populate(m)
 for stmt in SELECTORS:
     print(stmt)    
     for builder, name in [MAKE_shelf, MAKE_shelf_PRIMARY, MAKE_shelf_UNIQUE]:
         conn = setup(builder, items)
+        if fragment:
+            fragmentit(conn, addlist, dellist)
+        if vacuum:
+            conn.execute('VACUUM')
         print(sorted(timeit(conn, stmt, n=100) for i in range(6)), name)
     print()
 
+
     
-    
+''' Results:
+
+No difference between 'select *', 'select k,v', and 'select * by rowid'
+    Doesn't matter if the tables are unindexed, have a primary key, or a unique index.
+Fragmented leaves timing ratios the same but takes 3.34 times longer.
+Running a VACUUM after fragmenting doesn't speed things up.
+"order by key" is twice as slow as unordered, even with indexing.
+The effect of "primary key" and a unique index is the same
+BLOB and TEXT types have the same timing
+
+
+
+Results for unfragmented:  n, m = 2000, 6000
+
+SELECT * FROM shelf
+['0.38', '0.38', '0.38', '0.39', '0.39', '0.47'] UNINDEXED
+['0.38', '0.38', '0.39', '0.39', '0.39', '0.41'] PRIMARY
+['0.38', '0.38', '0.39', '0.39', '0.41', '0.42'] UNIQ
+
+SELECT * FROM shelf ORDER BY key
+['1.63', '1.63', '1.63', '1.64', '1.64', '1.74'] UNINDEXED
+['0.57', '0.57', '0.57', '0.58', '0.58', '0.63'] PRIMARY
+['0.57', '0.57', '0.57', '0.58', '0.58', '0.71'] UNIQ
+
+SELECT * FROM shelf ORDER BY rowid
+['0.38', '0.39', '0.39', '0.39', '0.39', '0.47'] UNINDEXED
+['0.38', '0.38', '0.39', '0.39', '0.40', '0.43'] PRIMARY
+['0.38', '0.38', '0.39', '0.39', '0.39', '0.44'] UNIQ
+
+SELECT key FROM shelf
+['0.26', '0.26', '0.26', '0.26', '0.26', '0.29'] UNINDEXED
+['0.26', '0.26', '0.26', '0.26', '0.26', '0.29'] PRIMARY
+['0.26', '0.26', '0.26', '0.26', '0.27', '0.35'] UNIQ
+
+SELECT value FROM shelf
+['0.26', '0.26', '0.26', '0.27', '0.27', '0.30'] UNINDEXED
+['0.26', '0.26', '0.26', '0.26', '0.27', '0.28'] PRIMARY
+['0.26', '0.26', '0.26', '0.26', '0.26', '0.32'] UNIQ
+
+SELECT key, value FROM shelf
+['0.38', '0.38', '0.38', '0.38', '0.39', '0.44'] UNINDEXED
+['0.38', '0.38', '0.38', '0.39', '0.40', '0.43'] PRIMARY
+['0.38', '0.38', '0.38', '0.39', '0.39', '0.47'] UNIQ
+
+
+----- Fragmented:  n, m = 2000, 6000
+
+SELECT * FROM shelf
+['1.27', '1.28', '1.29', '1.30', '1.32', '1.39'] UNINDEXED
+['1.29', '1.30', '1.31', '1.31', '1.31', '1.45'] PRIMARY
+['1.29', '1.29', '1.29', '1.30', '1.31', '1.45'] UNIQ
+
+SELECT * FROM shelf ORDER BY key
+['5.55', '5.58', '5.58', '5.59', '5.70', '5.75'] UNINDEXED
+['1.91', '1.92', '1.92', '1.92', '1.93', '2.02'] PRIMARY
+['1.91', '1.92', '1.94', '1.94', '2.08', '2.10'] UNIQ
+
+SELECT * FROM shelf ORDER BY rowid
+['1.30', '1.30', '1.30', '1.31', '1.42', '1.48'] UNINDEXED
+['1.28', '1.29', '1.29', '1.30', '1.30', '1.42'] PRIMARY
+['1.29', '1.30', '1.31', '1.31', '1.39', '1.40'] UNIQ
+
+SELECT key FROM shelf
+['0.90', '0.91', '0.91', '0.92', '0.92', '1.07'] UNINDEXED
+['0.89', '0.91', '0.91', '0.91', '0.91', '1.02'] PRIMARY
+['0.90', '0.91', '0.91', '0.92', '0.93', '1.06'] UNIQ
+
+SELECT value FROM shelf
+['0.90', '0.91', '0.91', '0.91', '0.93', '1.06'] UNINDEXED
+['0.89', '0.91', '0.91', '0.91', '0.92', '1.00'] PRIMARY
+['0.90', '0.91', '0.91', '0.91', '0.92', '0.99'] UNIQ
+
+SELECT key, value FROM shelf
+['1.29', '1.30', '1.30', '1.30', '1.31', '1.45'] UNINDEXED
+['1.29', '1.29', '1.29', '1.29', '1.30', '1.41'] PRIMARY
+['1.29', '1.29', '1.30', '1.30', '1.30', '1.45'] UNIQ
+
+
+----- Fragmented, then vacuumed:  n, m = 2000, 6000
+
+SELECT * FROM shelf
+['1.29', '1.29', '1.31', '1.31', '1.40', '1.49'] UNINDEXED
+['1.32', '1.32', '1.32', '1.33', '1.33', '1.52'] PRIMARY
+['1.28', '1.29', '1.31', '1.31', '1.32', '1.47'] UNIQ
+
+SELECT * FROM shelf ORDER BY key
+['5.76', '5.77', '5.77', '5.77', '5.77', '5.81'] UNINDEXED
+['1.91', '1.91', '1.91', '1.91', '1.92', '2.02'] PRIMARY
+['1.89', '1.90', '1.91', '1.92', '1.92', '2.07'] UNIQ
+
+SELECT * FROM shelf ORDER BY rowid
+['1.35', '1.37', '1.38', '1.51', '1.53', '1.56'] UNINDEXED
+['1.28', '1.29', '1.30', '1.30', '1.30', '1.42'] PRIMARY
+['1.32', '1.33', '1.33', '1.34', '1.34', '1.50'] UNIQ
+
+SELECT key FROM shelf
+['0.88', '0.89', '0.90', '0.90', '0.90', '1.09'] UNINDEXED
+['0.90', '0.91', '0.91', '0.92', '0.92', '1.02'] PRIMARY
+['0.89', '0.89', '0.89', '0.89', '0.90', '1.01'] UNIQ
+
+SELECT value FROM shelf
+['0.89', '0.90', '0.91', '0.91', '0.91', '1.10'] UNINDEXED
+['0.91', '0.91', '0.91', '0.91', '1.03', '1.08'] PRIMARY
+['0.90', '0.91', '0.92', '0.92', '0.92', '1.00'] UNIQ
+
+SELECT key, value FROM shelf
+['1.29', '1.30', '1.30', '1.30', '1.31', '1.46'] UNINDEXED
+['1.29', '1.29', '1.30', '1.30', '1.31', '1.38'] PRIMARY
+['1.28', '1.30', '1.30', '1.30', '1.33', '1.40'] UNIQ
+
+'''


More information about the Python-checkins mailing list