grouping array

Thu Sep 29 23:42:40 EDT 2005

pkilambi at gmail.com wrote:
> hi if I have an array
> 
> say x = [[2,2,0,0,1,1],
>          [1,1,0,0,1,1],
>          [1,1,0,0,1,1]]
> I basically want to group regions that are non zero like I want to get
> the coordinates of non zero regions..as (x1,y1,x2,y2)
> [(0,0,2,1),(0,4,2,5)] which show the top left(x1,y1) and bottom
> right(x2,y2) corners of each group.hope i am clear.
> 
> Thanks
> 
How about this:

def getregions(grid):
     """Yield lists of adjancent points, not necessarily rectangular"""
     adj = [(-1,0),(+1,0),(0,-1),(0,+1)] # horizontal and vertical adjacencies
                                         # could add diagonals

     points = set((y,x) for y, row in enumerate(grid)
                            for x, cell in enumerate(row)
                                if cell)

     while points:                # set of (y,x) non-zero points
         region = [points.pop()]  # start a new region with any remaining point
         ptr = 0
         while ptr < len(region):
             y, x = region[ptr]
             adjpoints = set((y + j, x + i) for j, i in adj)
             adjpoints &= points # keep only the non-zero, unseen points
             points -= adjpoints # remove these adjancencies from points
             region.extend(adjpoints) # add them to the region
             ptr += 1
         yield region

def getregioncoords(grid):
     """Get top left and bottom right of *rectangular* regions"""
     regions = getregions(grid)
     return [(reg[0], reg[-1]) for reg in regions if reg.sort() or True]

  >>> x = [[2,2,0,0,1,1],
  ...      [1,1,0,0,1,1],
  ...      [1,1,0,0,1,1]]
  ...
  ...
  >>> getregioncoords(x)
  [((0, 0), (2, 1)), ((0, 4), (2, 5))]
   >>> x = [[1,0,1,0,1]]
  >>> getregioncoords(x)
  [((0, 0), (0, 0)), ((0, 2), (0, 2)), ((0, 4), (0, 4))]
  >>> x = [[random.choice([0,1,2]) for x in range(6)] for y in range(6)]
  >>> pprint.pprint(x)
  [[1, 1, 2, 1, 2, 0],
   [2, 0, 0, 2, 0, 1],
   [1, 2, 2, 0, 2, 0],
   [0, 1, 0, 0, 0, 0],
   [2, 0, 0, 1, 1, 0],
   [2, 2, 2, 0, 1, 0]]
  >>> print "\n".join(str(reg) for reg in getregions(x))
  [(0, 1), (0, 0), (0, 2), (1, 0), (0, 3), (2, 0), (1, 3), (0, 4), (2, 1), (3, 
1), (2, 2)]
  [(5, 4), (4, 4), (4, 3)]
  [(5, 0), (5, 1), (4, 0), (5, 2)]
  [(1, 5)]
  [(2, 4)]
  >>>

Unfortunately, it's rather slow.  This one is much faster, using just one data 
structure

def getregions2(grid):
     """Yield lists of adjancent points, not necessarily rectangular"""
     adj = [(-1,0),(+1,0),(0,-1),(0,+1)] # horizontal and vertical adjacencies
                                         # could add diagonals
     rows = len(grid)
     cols = len(grid[0])
     griddata = []
     for row in grid:
         griddata.extend(row)
     for y in xrange(rows):
         ybase = y * cols
         for x in xrange(cols):
             if griddata[ybase + x]:
                 griddata[ybase + x] = 0
                 region = [(y, x)]
                 append = region.append
                 ptr = 0
                 while ptr < len(region):
                     y1, x1 = region[ptr]
                     for j, i in adj:
                         y2, x2 = y1 + j, x1 + i
                         if y2 < 0 or y2 == rows: continue
                         if x2 < 0 or x2 == cols: continue
                         if griddata[y2 * cols + x2]:
                             append((y2, x2))
                             griddata[y2 * cols + x2] = 0
                     ptr += 1
                 yield region

Michael