How do I get a reference to a KEY value of a dictionary?

Bengt Richter bokr at oz.net
Sat Aug 2 19:32:44 EDT 2003


On 2 Aug 2003 10:16:13 -0400, aahz at pythoncraft.com (Aahz) wrote:

>In article <w1MWa.133$Tz4.17019035 at newssvr21.news.prodigy.com>,
>Andy C <ayc8NOSPAM at cornell.edu> wrote:
>>
>>I don't see how I can do this and let me eliminate duplicates.  I need
>>to assign the old duplicate string to the unique string that already
>>exists.  Hence the question, how do I get a reference to the KEY value?
>>I know I can use keys() and do a linear search, but that is much more
>>inefficient.  I would like to get a reference to the key value in the
>>same time that it takes to do a hash lookup (constant time).
>
>Ahhhh....  Right.  Hmmmmm....  <thinks hard>  You're correct, you do
>need to set the value to the key.  I think using a dict is better than
>using intern().  Here's an optimization:
>
>    node2 = node_cache.setdefault(node2, node2)

I would sure be more comfortable with a change in nomenclature, e.g.,

     node2name = node_name_cache.setdefault(node2name, node2name)

so that it is clear that you are dealing with name strings, not typical graph
vertex/node objects. 

BTW, reading from a file, you can probably not in general avoid forward references,
(i.e., names of adjacent nodes yet to be defined), but they should be resolvable at the end,
so you could make nodes that refer to adjacent nodes directly instead of by name.
Nodes would carry a ref to their own (string) name, and go by that same name string in a graph dict,
so there should be no duplication of strings, just references to them. E.g.,
(I started to just sketch something, and ... well, it's hard to stop programming Python
before something is running at least preliminarily ;-) I subclassed dict to keep nodes in by name.

====< graph.py >=========================================================
def boxit(s):
    lines = s.splitlines()
    wid = max(map(len,lines))
    ret = ['+-%s-+' % ('-'*wid)]
    fmt = '| %%-%ds |'%wid
    for line in lines: ret.append(fmt%line)
    ret.append(ret[0])
    return '\n'.join(ret)

def errep(Ex, *rest):
    if __debug__: print boxit('%s: %s'%(Ex.__name__, rest and rest[0])) # and blunder on
    else: raise Ex(*rest)

class Node(object):
    __slots__ = 'name', 'adj_list'
    def __init__(self, name):
        self.name = name
        self.adj_list = None # signify undefined, vs [] for no adjacent nodes

class Graph(dict):
    def load(self, infile, print_lines=False):
        for line in infile:
            if print_lines: print line.rstrip()
            sline = line.strip()
            if not sline or sline.startswith('#'): continue
            node_def_names = line.split() # assume line like 'nodename adj_node1_name adj_node2_name ...'
            node_name = node_def_names[0]
            if node_name in self:
                # don't allow re-definition
                errep(ValueError, 'Node %r being re-defined:\n    new adj: %r\n    old adj: %r' %(
                    node_name, node_def_names[1:], self[node_name].adj_list))
            else:
                self[node_name] = Node(node_name)
                # set adj list, making making this a defined node
                self[node_name].adj_list =  node_def_names[1:]
        # change adj lists to direct references to adjacent nodes
        for node in self.values():
            adj_list = node.adj_list
            assert adj_list is not None
            for i in xrange(len(adj_list)):
                adj_name = adj_list[i]
                try:
                    adj_list[i] = self[adj_name]
                except KeyError:
                    errep( ValueError, 'node %r refers to undefined adj node %r' % (node.name, adj_name))
                    # continuing if debugging
                    adj_list[i] = Node(adj_name)    # adj_list None (vs []) signifies undefined 
    def show(self):
        node_names = self.keys()
        node_names.sort()
        visited = {}
        ret = []
        def prtree(node, indent=0):
            if node.name in visited:
                if indent: ret.append('%s%s (see previous)'%(indent*'  ',node.name))
            else:
                visited[node.name]=True
                if node.adj_list is None:
                    ret.append('%s%s (undefined) '%(indent*'  ',node.name))
                    return
                ret.append('%s%s'%(indent*'  ',node.name))
                for adj in node.adj_list:
                    prtree(adj, indent+1)
        for node_name in node_names:
            prtree(self[node_name])
        ret.append('')
        return '\n'.join(ret)

def test(filepath=''):
    graph = Graph() # name-string -> Node instance
    if not filepath:
        import StringIO
        f_graph = StringIO.StringIO("""
            A B C D
            B E F
            C G H
            G A
            D B
            E
            F
            H
            D E F
            X E Y
        """)
    else:
        f_graph = file(filepath)
    
    print 'Loading graph from %s ...\n--' % (filepath or '(test)')
    graph.load(f_graph, True) # print the lines read
    print '--'
    f_graph.close()
    
    print graph.show()
    
if __name__ == '__main__':
    import sys
    args = sys.argv[1:]
    test(args and args[0] or '')
=========================================================================

Result:

 [16:33] C:\pywk\clp\pp>graph.py
 Loading graph from (test) ...
 --

             A B C D
             B E F
             C G H
             G A
             D B
             E
             F
             H
             D E F
 +----------------------------------------+
 | ValueError: Node 'D' being re-defined: |
 |     new adj: ['E', 'F']                |
 |     old adj: ['B']                     |
 +----------------------------------------+
             X E Y

 +-------------------------------------------------------+
 | ValueError: node 'X' refers to undefined adj node 'Y' |
 +-------------------------------------------------------+
 --
 A
   B
     E
     F
   C
     G
       A (see previous)
     H
   D
     B (see previous)
 X
   E (see previous)
   Y (undefined)

(Not tested beyond what you see. NTBWYS? ;-)

Regards,
Bengt Richter




More information about the Python-list mailing list