[Python-checkins] r69199 - sandbox/trunk/dbm_sqlite/alt/time_sqlite.py
raymond.hettinger
python-checkins at python.org
Mon Feb 2 07:49:12 CET 2009
Author: raymond.hettinger
Date: Mon Feb 2 07:49:12 2009
New Revision: 69199
Log:
Add a fragmentation step and a vacuum step. Record the results.
Modified:
sandbox/trunk/dbm_sqlite/alt/time_sqlite.py
Modified: sandbox/trunk/dbm_sqlite/alt/time_sqlite.py
==============================================================================
--- sandbox/trunk/dbm_sqlite/alt/time_sqlite.py (original)
+++ sandbox/trunk/dbm_sqlite/alt/time_sqlite.py Mon Feb 2 07:49:12 2009
@@ -1,16 +1,8 @@
''' Testbench and timing routines to figure-out the fastest schema for shelves.
-With unique index or primary key or no index.
- Insertion speed (do we pay a high price for reindexing on every commit)
- Individual Lookup speed (regular value, masked key, uncommitted key)
- Speed of replacing vs inserting.
- Speed of:
- 'select *'
- 'select * order by key'
- 'select * order by rowid'
- Do 'select key', 'select *', and 'select value' all produce the same order?
+Todo:
Delayed commits:
How much insertion speed is gained by n-period commits
@@ -19,19 +11,10 @@
Do 'select key', 'select *', and 'select value' all produce the same order?
-Same tests, but first fragment the snot out of the db.
-
-What about deletions and fragmentation?
-Cost and effects of vacuuming.
-BLOB vs TEXT
-
------------------------------------------------------------------
-no diff twixt "select *" and "select k,v" # prefer latter for specificity
-no diff twixt "select *" and "select * by rowid" # prefer latter for specificity
-no diff for "select *" whether unindexed, primary key, and indexed
-? do those results hold after fragmenting
-"order by key" is twice as slow as unordered, even with indexing
-effect of "primary key" and a unique index is the same
+With unique index or primary key or no index.
+ Insertion speed (do we pay a high price for reindexing on every commit)
+ Individual Lookup speed (regular value, masked key, uncommitted key)
+
'''
from random import *
@@ -54,6 +37,22 @@
CREATE UNIQUE INDEX IF NOT EXISTS keyndx ON shelf (key);
''', 'UNIQ')
+
+MAKE_shelf = ('''
+ DROP INDEX IF EXISTS keyndx; DROP TABLE IF EXISTS shelf;
+ CREATE TABLE IF NOT EXISTS shelf (key BLOB NOT NULL, value BLOB NOT NULL);
+''', 'UNINDEXED')
+MAKE_shelf_PRIMARY = ('''
+ DROP INDEX IF EXISTS keyndx; DROP TABLE IF EXISTS shelf;
+ CREATE TABLE IF NOT EXISTS shelf (key BLOB PRIMARY KEY, value BLOB NOT NULL);
+''', 'PRIMARY')
+MAKE_shelf_UNIQUE = ('''
+ DROP INDEX IF EXISTS keyndx; DROP TABLE IF EXISTS shelf;
+ CREATE TABLE IF NOT EXISTS shelf (key BLOB NOT NULL, value BLOB NOT NULL);
+ CREATE UNIQUE INDEX IF NOT EXISTS keyndx ON shelf (key);
+''', 'UNIQ')
+
+
SELECTORS = [
'SELECT * FROM shelf',
'SELECT * FROM shelf ORDER BY key',
@@ -88,6 +87,18 @@
conn.commit()
return conn
+def fragmentit(conn, addlist, dellist, n=20):
+ # Make random adds and deletes, n at time
+ addlist = addlist[:]
+ dellist = dellist[:]
+ while len(addlist) >= n and len(dellist) >= n:
+ for i in range(n):
+ conn.execute('DELETE FROM shelf WHERE ROWID = ?', dellist.pop())
+ conn.commit()
+ for i in range(n):
+ conn.execute('REPLACE INTO shelf (key, value) VALUES (?, ?)', addlist.pop())
+ conn.commit()
+
def timeit(conn, stmt, n=20):
conn.execute(stmt) # precompile
start = clock()
@@ -95,15 +106,133 @@
conn.execute(stmt).fetchall()
return '%.2f' % (clock() - start)
-
+n, m = 2000, 6000
+fragment, vacuum = True, True
seed('xyzpdqbingo')
-items = populate(2000)
+items = populate(n)
+dellist = [(randrange(n),) for i in range(m)]
+addlist = populate(m)
for stmt in SELECTORS:
print(stmt)
for builder, name in [MAKE_shelf, MAKE_shelf_PRIMARY, MAKE_shelf_UNIQUE]:
conn = setup(builder, items)
+ if fragment:
+ fragmentit(conn, addlist, dellist)
+ if vacuum:
+ conn.execute('VACUUM')
print(sorted(timeit(conn, stmt, n=100) for i in range(6)), name)
print()
+
-
+''' Results:
+
+No difference between 'select *', 'select k,v', and 'select * by rowid'
+ Doesn't matter if the tables are unindexed, have a primary key, or a unique index.
+Fragmented leaves timing ratios the same but takes 3.34 times longer.
+Running a VACUUM after fragmenting doesn't speed things up.
+"order by key" is twice as slow as unordered, even with indexing.
+The effect of "primary key" and a unique index is the same
+BLOB and TEXT types have the same timing
+
+
+
+Results for unfragmented: n, m = 2000, 6000
+
+SELECT * FROM shelf
+['0.38', '0.38', '0.38', '0.39', '0.39', '0.47'] UNINDEXED
+['0.38', '0.38', '0.39', '0.39', '0.39', '0.41'] PRIMARY
+['0.38', '0.38', '0.39', '0.39', '0.41', '0.42'] UNIQ
+
+SELECT * FROM shelf ORDER BY key
+['1.63', '1.63', '1.63', '1.64', '1.64', '1.74'] UNINDEXED
+['0.57', '0.57', '0.57', '0.58', '0.58', '0.63'] PRIMARY
+['0.57', '0.57', '0.57', '0.58', '0.58', '0.71'] UNIQ
+
+SELECT * FROM shelf ORDER BY rowid
+['0.38', '0.39', '0.39', '0.39', '0.39', '0.47'] UNINDEXED
+['0.38', '0.38', '0.39', '0.39', '0.40', '0.43'] PRIMARY
+['0.38', '0.38', '0.39', '0.39', '0.39', '0.44'] UNIQ
+
+SELECT key FROM shelf
+['0.26', '0.26', '0.26', '0.26', '0.26', '0.29'] UNINDEXED
+['0.26', '0.26', '0.26', '0.26', '0.26', '0.29'] PRIMARY
+['0.26', '0.26', '0.26', '0.26', '0.27', '0.35'] UNIQ
+
+SELECT value FROM shelf
+['0.26', '0.26', '0.26', '0.27', '0.27', '0.30'] UNINDEXED
+['0.26', '0.26', '0.26', '0.26', '0.27', '0.28'] PRIMARY
+['0.26', '0.26', '0.26', '0.26', '0.26', '0.32'] UNIQ
+
+SELECT key, value FROM shelf
+['0.38', '0.38', '0.38', '0.38', '0.39', '0.44'] UNINDEXED
+['0.38', '0.38', '0.38', '0.39', '0.40', '0.43'] PRIMARY
+['0.38', '0.38', '0.38', '0.39', '0.39', '0.47'] UNIQ
+
+
+----- Fragmented: n, m = 2000, 6000
+
+SELECT * FROM shelf
+['1.27', '1.28', '1.29', '1.30', '1.32', '1.39'] UNINDEXED
+['1.29', '1.30', '1.31', '1.31', '1.31', '1.45'] PRIMARY
+['1.29', '1.29', '1.29', '1.30', '1.31', '1.45'] UNIQ
+
+SELECT * FROM shelf ORDER BY key
+['5.55', '5.58', '5.58', '5.59', '5.70', '5.75'] UNINDEXED
+['1.91', '1.92', '1.92', '1.92', '1.93', '2.02'] PRIMARY
+['1.91', '1.92', '1.94', '1.94', '2.08', '2.10'] UNIQ
+
+SELECT * FROM shelf ORDER BY rowid
+['1.30', '1.30', '1.30', '1.31', '1.42', '1.48'] UNINDEXED
+['1.28', '1.29', '1.29', '1.30', '1.30', '1.42'] PRIMARY
+['1.29', '1.30', '1.31', '1.31', '1.39', '1.40'] UNIQ
+
+SELECT key FROM shelf
+['0.90', '0.91', '0.91', '0.92', '0.92', '1.07'] UNINDEXED
+['0.89', '0.91', '0.91', '0.91', '0.91', '1.02'] PRIMARY
+['0.90', '0.91', '0.91', '0.92', '0.93', '1.06'] UNIQ
+
+SELECT value FROM shelf
+['0.90', '0.91', '0.91', '0.91', '0.93', '1.06'] UNINDEXED
+['0.89', '0.91', '0.91', '0.91', '0.92', '1.00'] PRIMARY
+['0.90', '0.91', '0.91', '0.91', '0.92', '0.99'] UNIQ
+
+SELECT key, value FROM shelf
+['1.29', '1.30', '1.30', '1.30', '1.31', '1.45'] UNINDEXED
+['1.29', '1.29', '1.29', '1.29', '1.30', '1.41'] PRIMARY
+['1.29', '1.29', '1.30', '1.30', '1.30', '1.45'] UNIQ
+
+
+----- Fragmented, then vacuumed: n, m = 2000, 6000
+
+SELECT * FROM shelf
+['1.29', '1.29', '1.31', '1.31', '1.40', '1.49'] UNINDEXED
+['1.32', '1.32', '1.32', '1.33', '1.33', '1.52'] PRIMARY
+['1.28', '1.29', '1.31', '1.31', '1.32', '1.47'] UNIQ
+
+SELECT * FROM shelf ORDER BY key
+['5.76', '5.77', '5.77', '5.77', '5.77', '5.81'] UNINDEXED
+['1.91', '1.91', '1.91', '1.91', '1.92', '2.02'] PRIMARY
+['1.89', '1.90', '1.91', '1.92', '1.92', '2.07'] UNIQ
+
+SELECT * FROM shelf ORDER BY rowid
+['1.35', '1.37', '1.38', '1.51', '1.53', '1.56'] UNINDEXED
+['1.28', '1.29', '1.30', '1.30', '1.30', '1.42'] PRIMARY
+['1.32', '1.33', '1.33', '1.34', '1.34', '1.50'] UNIQ
+
+SELECT key FROM shelf
+['0.88', '0.89', '0.90', '0.90', '0.90', '1.09'] UNINDEXED
+['0.90', '0.91', '0.91', '0.92', '0.92', '1.02'] PRIMARY
+['0.89', '0.89', '0.89', '0.89', '0.90', '1.01'] UNIQ
+
+SELECT value FROM shelf
+['0.89', '0.90', '0.91', '0.91', '0.91', '1.10'] UNINDEXED
+['0.91', '0.91', '0.91', '0.91', '1.03', '1.08'] PRIMARY
+['0.90', '0.91', '0.92', '0.92', '0.92', '1.00'] UNIQ
+
+SELECT key, value FROM shelf
+['1.29', '1.30', '1.30', '1.30', '1.31', '1.46'] UNINDEXED
+['1.29', '1.29', '1.30', '1.30', '1.31', '1.38'] PRIMARY
+['1.28', '1.30', '1.30', '1.30', '1.33', '1.40'] UNIQ
+
+'''
More information about the Python-checkins
mailing list