Overlap in python

Mensanator mensanator at aol.com
Tue Aug 4 18:47:16 EDT 2009


On Aug 4, 2:57 pm, Gregor Lingl <gregor.li... at aon.at> wrote:
> Jay Bird schrieb:
>
>
>
>
>
> > Hi everyone,
>
> > I've been trying to figure out a simple algorithm on how to combine a
> > list of parts that have 1D locations that overlap into a non-
> > overlapping list.  For example, here would be my input:
>
> > part name   location
> > a                  5-9
> > b                  7-10
> > c                  3-6
> > d                  15-20
> > e                  18-23
>
> > And here is what I need for an output:
> > part name   location
> > c.a.b            3-10
> > d.e               15-23
>
> Suppose you have your data given as a dictionary:
>
> data = {'a':(5,9),
>          'b':(7,10),
>          'c':(3,6),
>          'd':(15,20),
>          'e':(18,23)}
>
> Then the following not particularly elegant
> but very simple and straight forward
> algorithm will solve your problem:
>
> def values(key):
>      return set(range(data[key][0], data[key][1]+1))
>
> keys = list(data.keys())
> result = []
> while keys:
>      k = [keys[0]]
>      keys = keys[1:]
>      v = values(k[0])
>      for l in keys[:]:
>          if v.intersection(values(l)):
>              v.update(values(l))
>              k.append(l)
>              keys.remove(l)
>      result.append((k,v))
>
> # print(result) ## if you like
>
> for k, v in result:
>      print(".".join(sorted(k)), "%d-%d" % (min(v), max(v)))
>
> Regards,
> Gregor
>
>
>
> > I've tried various methods, which all fail.  Does anyone have an idea
> > how to do this?
>
> > Thank you very much!
> > Jay


I was thinking sets, too.

import random

def overlap():
  merge_count = 0
  datam.append(data.pop(0))
  while len(data)>0:
    d0 = data.pop(0)
    dmi = 0
    dmtemp = datam[:]
    while d0:
      dmtest = d0[1].intersection(dmtemp[dmi][1])
      #print(d0,dmtemp[dmi],dmtest)
      if len(dmtest)>0: # overlap
        dmtest = d0[1].union(dmtemp[dmi][1])
        datam[dmi] = (d0[0]+dmtemp[dmi][0],dmtest)
        d0 = None
        dmi = 0
        merge_count += 1
      else:
        dmi += 1
        if dmi == len(dmtemp):
          datam.append(d0)
          d0 = None
  return merge_count # repeat until 0 returned

## OP data
##data=[]
##data.append(('a',set([i for i in range(5,9+1)])))
##data.append(('b',set([i for i in range(7,10+1)])))
##data.append(('c',set([i for i in range(3,6+1)])))
##data.append(('d',set([i for i in range(15,20+1)])))
##data.append(('e',set([i for i in range(18,23+1)])))
##datam = []
##
##print()
##for j in data: print(j)
##print()
##
##
##overlap()
##
##for i in datam:
##  print(i[0],min(i[1]),max(i[1]))
##
##  ('a', {8, 9, 5, 6, 7})
##  ('b', {8, 9, 10, 7})
##  ('c', {3, 4, 5, 6})
##  ('d', {15, 16, 17, 18, 19, 20})
##  ('e', {18, 19, 20, 21, 22, 23})
##
##  cba 3 10
##  ed 15 23



## special case - repeat overlap test until no merges

data = [('A', {79, 80, 81, 82, 83, 84, 85, 86}),
('B', {96, 89, 90, 91, 92, 93, 94, 95}),
('C', {21, 22, 23, 24, 25, 26, 27, 28}),
('D', {64, 65, 66, 67, 68, 69, 62, 63}),
('E', {72, 73, 74, 75, 76, 77, 78, 79}),
('F', {65, 66, 67, 68, 69, 70, 71, 72}),
('G', {11, 12, 13, 14, 15, 16, 17, 18}),
('H', {24, 25, 26, 27, 28, 29, 30, 31}),
('I', {32, 33, 34, 35, 36, 37, 38, 31}),
('J', {81, 82, 83, 84, 85, 86, 87, 88})]
datam = []

##  ('A', {55, 56, 57, 58, 59, 60, 61, 62})
##  ('B', {64, 57, 58, 59, 60, 61, 62, 63})
##  ('C', {10, 11, 12, 13, 14, 15, 16, 17})
##  ('D', {32, 25, 26, 27, 28, 29, 30, 31})
##  ('E', {54, 55, 56, 57, 58, 59, 60, 61})
##  ('F', {64, 65, 66, 59, 60, 61, 62, 63})
##  ('G', {41, 42, 43, 44, 45, 46, 47, 48})
##  ('H', {67, 68, 69, 70, 71, 72, 73, 74})
##  ('I', {55, 56, 57, 58, 59, 60, 61, 62})
##  ('J', {64, 65, 66, 67, 68, 69, 62, 63})
##
##  JIFEBA 54 69
##  C 10 17
##  D 25 32
##  G 41 48
##  H 67 74   <--- NFG overlaps JIFEBA

print()
for j in data: print(j)
print()

merges = 1             # corrects above
while merges > 0:
  merges = overlap()
  print(merges)
  if merges>0:
    data = datam[:]
    datam = []

for i in datam:
  print(i[0],min(i[1]),max(i[1]))

##  corrected
##
##  ('A', {79, 80, 81, 82, 83, 84, 85, 86})
##  ('B', {96, 89, 90, 91, 92, 93, 94, 95})
##  ('C', {21, 22, 23, 24, 25, 26, 27, 28})
##  ('D', {64, 65, 66, 67, 68, 69, 62, 63})
##  ('E', {72, 73, 74, 75, 76, 77, 78, 79})
##  ('F', {65, 66, 67, 68, 69, 70, 71, 72})
##  ('G', {11, 12, 13, 14, 15, 16, 17, 18})
##  ('H', {24, 25, 26, 27, 28, 29, 30, 31})
##  ('I', {32, 33, 34, 35, 36, 37, 38, 31})
##  ('J', {81, 82, 83, 84, 85, 86, 87, 88})
##
##  5
##  1
##  0
##  DJFEA 62 88
##  B 89 96
##  IHC 21 38
##  G 11 18


# random sequences
data = []
for j in range(12):
  q = random.randint(0,90)
  r = q+12
  data.append((chr(j+65),set([x for x in range(q,r)])))
datam = []

print()
for j in data: print(j)
print()

merges = 1             # corrects above
while merges > 0:
  merges = overlap()
  print(merges)
  if merges>0:
    data = datam[:]
    datam = []

for i in datam:
  print(i[0],min(i[1]),max(i[1]))

##  ('A', {3, 4, 5, 6, 7, 8, 9, 10})
##  ('B', {76, 77, 78, 79, 80, 81, 82, 83})
##  ('C', {43, 44, 45, 46, 47, 48, 49, 50})
##  ('D', {42, 43, 44, 45, 46, 47, 48, 49})
##  ('E', {23, 24, 25, 26, 27, 28, 29, 30})
##  ('F', {49, 50, 51, 52, 53, 54, 55, 56})
##  ('G', {76, 77, 78, 79, 80, 81, 82, 83})
##  ('H', {1, 2, 3, 4, 5, 6, 7, 8})
##  ('I', {37, 38, 39, 40, 41, 42, 43, 44})
##  ('J', {83, 84, 85, 86, 87, 88, 89, 90})
##
##  6
##  0
##  HA 1 10
##  JGB 76 90
##  IFDC 37 56
##  E 23 30

##  ('A', {64, 65, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63})
##  ('B', {34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45})
##  ('C', {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27})
##  ('D', {70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81})
##  ('E', {64, 65, 66, 67, 56, 57, 58, 59, 60, 61, 62, 63})
##  ('F', {34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45})
##  ('G', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
##  ('H', {64, 65, 66, 55, 56, 57, 58, 59, 60, 61, 62, 63})
##  ('I', {44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55})
##  ('J', {64, 65, 66, 67, 68, 57, 58, 59, 60, 61, 62, 63})
##  ('K', {96, 97, 98, 99, 88, 89, 90, 91, 92, 93, 94, 95})
##  ('L', {96, 97, 98, 99, 100, 89, 90, 91, 92, 93, 94, 95})
##
##  6
##  1
##  0
##  FBJIHEA 34 68
##  C 16 27
##  D 70 81
##  G 0 11
##  LK 88 100





More information about the Python-list mailing list