How to test if one dict is subset of another?

Peter Otten __peter__ at web.de
Mon Feb 19 06:54:14 EST 2007


Jay Tee wrote:

> On Feb 19, 11:07 am, Peter Otten <__pete... at web.de> wrote:
> 
>> Use a RDBMS (a database), they tend to be good at this kind of
>> operations.
> 
> yeah, one of the options is metakit ... sqlite and buzhug both looked
> promising but the constraint of pythons 2.2 and 2.3 ruled that out.
> disadvantage of metakit is that it's not pure python, meaning possible
> integration problems.  the system has to be deployed at 200+ sites
> worldwide on a mix of RHEL 3 and 4 based systems, with some Debian
> clusters thrown in, and running real production ...
> 
> hence my desire to find a pure-python solution if at all possible.
> it's looking grim.

The following may speed up things a bit if you have enough memory and your
data is queried more often than changed.

import sys

def generate_id():
    for i in xrange(sys.maxint):
        yield i
    raise ImplementationRestriction

get_id = generate_id().next

class Record(dict):
    def __init__(self, *args, **kw):
        dict.__init__(self, *args, **kw)
        assert not hasattr(self, "_id")
        self._id = get_id()
    def __setitem__(self, key, value):
        raise ImmutableException
    def __hash__(self):
        return self._id
    def __str__(self):
        items = self.items()
        items.sort()
        return ", ".join(["%s: %s" % p for p in items])

records = dict.fromkeys([
    Record(user="jack", start=42, state="running"),
    Record(user="jack", start=47, state="running"),
    Record(user="jack", start=46, state="queued"),
    Record(user="jane", start=42, state="running"),
    Record(user="jane", start=7),
])

def fill_cache(records):
    cache = {}
    for record in records:
        for p in record.iteritems():
            cache.setdefault(p, {})[record] = None
    return cache

_cache = fill_cache(records)

def select(*data, **kw):
    [data] = data
    result = data
    for p in kw.iteritems():
        try:
            c = _cache[p]
        except KeyError:
            c = {}
        if not c:
            return {}
        result = dict.fromkeys([k for k in result if k in c])
        if not result:
            break
    return result

if __name__ == "__main__":
    for filter in [
            dict(user="jack"),
            dict(state="running"),
            dict(user="jack", state="running"),
            dict(state="undefined"),
            dict(user="jane", state="queued")
            ]:
        print "--- %s ---" % Record(filter)
        result = select(records, **filter)
        if result:
            for record in result:
                print record
        else:
            print "#no matching records"
        print

The above code runs with Python 2.3; don't know about 2.2 as I don't have it
on my machine. Of course with sets it would look better...

Peter



More information about the Python-list mailing list