Grouping pairs - suggested tools

Arnaud Delobelle arnodel at gmail.com
Tue Sep 21 15:34:17 EDT 2010


On 21 Sep, 11:13, Peter Otten <__pete... at web.de> wrote:
[...]
>
> A straightforward implementation:
>
> $ cat group_edges.py
> def find_groups(edges):
>     lookup = {} # node --> group
>     groups = {} # id(group) --> group
>     for a, b in edges:              
>         if a in lookup:              
>             if b in lookup:          
>                 ga = lookup[a]      
>                 gb = lookup[b]      
>                 if ga is not gb:    
>                     # merge groups  
>                     if len(ga) < len(gb):
>                         # always update the smaller group
>                         ga, gb = gb, ga                  
>                     del groups[id(gb)]                  
>                     ga.update(gb)                        
>                     for k in gb:                        
>                         lookup[k] = ga                  
>             else:                                        
>                 # add b to a's group                    
>                 ga = lookup[a]
>                 ga.add(b)
>                 lookup[b] = ga
>         elif b in lookup:
>             # add a to b's group
>             gb = lookup[b]
>             gb.add(a)
>             lookup[a] = gb
>         else:
>             # create new group
>             g = set((a, b))
>             groups[id(g)] = g
>             lookup[a] = lookup[b] = g
>     return groups.values()
>
> def show_groups(edges):
>     groups = sorted(sorted(g) for g in find_groups(edges))
>     for i, group in enumerate(groups, 1):
>         for node in sorted(group):
>             print i, node
>

I think I would go for the two-step approach of constructing the graph
first and then recursively building connected components.  It sounds
more complicated at first but when you implement it it turns out quite
simple:

from collections import defaultdict
from itertools import count

def build_groups(edges):
    neighbors = defaultdict(set)
    for x, y in edges:
        neighbors[x].add(y)
        neighbors[y].add(x)

    groups, group_indices = {}, count(1)
    def set_group(x, group_index):
        groups[x] = group_index
        for y in neighbors[x]:
            if y not in groups:
                set_group(y, group_index)

    for x in neighbors:
        if x not in groups:
            set_group(x, group_indices.next())
    return groups

if __name__ == "__main__":
    def print_groups(edges):
        print " ".join(edges)
        groups = build_groups(edges)
        for x, i in sorted(groups.items()):
            print i, x
        print
    examples = [
        ['ab', 'bc', 'ad', 'ef', 'gf', 'hi'],
        ['ac', 'bd', 'cd'],
        ]
    for edges in examples:
        print_groups(edges)


$ python connected.py
ab bc ad ef gf hi
1 a
1 b
1 c
1 d
2 e
2 f
2 g
3 h
3 i

ac bd cd
1 a
1 b
1 c
1 d

--
Arnaud



More information about the Python-list mailing list