efficient intersection of lists with rounding

Gordon Williams g_will at cyberus.ca
Mon Dec 6 11:30:35 EST 2004



> Hi,
>
> I have to lists that I need to find the common numbers (2nd rounded to
> nearest integral) and I am wondering if there is a more efficient way of
> doing it.
>
> >>> a= [(123,1.3),(123,2.4),(123,7.8),(123,10.2)]
> >>> b= [(123, 0.9), (123, 1.9), (123, 8.0)]
> >>> [ (i,round(j)) for i,j in a for l,m in b if (i,round(j)) ==
> (l,round(m))]
> [(123, 1.0), (123, 2.0), (123, 8.0)]

Thanks for all your suggestions.  I've tried each one with lists of 1K, 10K
and 30K long and tabulated the results below.  Run with profile on W2K,
python 2.3.2, 1GHz Athlon.

1K, 10K and 30K long (seconds per call)
t1= 0.009, 0.148, 0.563
t2= 0.015, 0.217, 0.777
t3= 0.008, 0.108, 0.487
t4= 0.016, 0.190, 0.749
t5= 0.015, 0.224, 0.773

The non-set algorithims (t1,t3) came out the winners (maybe due to the
conversion of the set to a sorted list.  I didn't look into it any farther.)

Regards,

Gordon Williams


--------

from sets import Set
import random


size = 1000

a = [(123,i+random.choice([-.2,-.1,.1,.2])) for i in range(size)]
b = [(123, 1+i+random.choice([-.2,-.1,.1,.2])) for i in range(size)]

def t1():
    #Diez B. Roggisch <deetsNOSPAM at web.de>
    ra =  [ (i,round(j)) for i,j in a]
    rb =  [ (i,round(j)) for i,j in b]


    res = []
    pos_b = 0

    try:
        for i, pivot in ra:
            while rb[pos_b][1] < pivot:
                pos_b += 1
            while rb[pos_b][1] == pivot:
                res.append(rb[pos_b])
                pos_b += 1
    except IndexError:
        # If b gets exhausted somewhere
        pass
    return res


def t2():
    #Steven Bethard <steven.bethard at gmail.com>
    def roundedj(pairs_iterable):
        return [(i, round(j)) for i, j in pairs_iterable]

    l=list(Set(roundedj(a)).intersection(Set(roundedj(b))))
    l.sort()
    return l


def t3():
    #Greg Ewing <greg at cosc.canterbury.ac.nz>
    d = {}
    for (l, m) in b:
       d[l, round(m)] = 1

    result = []
    for (i, j) in a:
       t = (i, round(j))
       if t in d:
         result.append(t)
    return result


def t4():
    #Adam DePrince <adam at cognitcorp.com>
    set_a = Set( [(i,round(j)) for i,j in a] )
    set_b = Set( [(i,round(j)) for i,j in b] )
    l= list(set_a.intersection( set_b ))
    l.sort()
    return l

def t5():
    #Raymond Hettinger <vze4rx4y at verizon.net>
    l= list(Set([(x,round(y)) for x,y in a]) & Set([(x,round(y)) for x,y in
b]))
    l.sort()
    return l

def test():
    r1= t1()
    r2= t2()
    r3= t3()
    r4= t4()
    r5= t5()





More information about the Python-list mailing list