Perl and Python, a practical side-by-side example.

attn.steven.kuo at gmail.com attn.steven.kuo at gmail.com
Sat Mar 3 03:08:57 EST 2007


On Mar 2, 2:44 pm, "Shawn Milo" <S... at Milochik.com> wrote:

(snipped)

> I'm attaching both the Perl and Python versions, and I'm open to
> comments on either. The script reads a file from standard input and
> finds the best record for each unique ID (piid). The best is defined
> as follows: The newest expiration date (field 5) for the record with
> the state (field 1) which matches the desired state (field 6). If
> there is no record matching the desired state, then just take the
> newest expiration date.
>
> Thanks for taking the time to look at these.
>


My attempts:

### Perl ###

#!/usr/bin/perl
use strict;
use warnings;

use List::Util qw/reduce/;
use constant {
    STATE  => 1,
    DATE   => 6,
    TARGET => 5,
};

sub keep_best {
    my ($best, $current) = @_;
    if ($current->[STATE] eq $current->[TARGET]) {
       if ($best->[STATE] eq $best->[TARGET]) {
           if ($current->[DATE] gt $best->[DATE]) {
               return 0;
           }
       } else {
           return 0;
       }
    } elsif (
           $best->[STATE] ne $best->[TARGET]
               and
           $current->[DATE] gt $best->[DATE]) {
           return 0;
    }
    return 1;
}


my %input;

# while uses less memory than for:
# the former is an iterator

while (<>)
{
    chomp;
    my @results = split(/\t/, $_);
    my $key = $results[0];
    push @{$input{$key}}, [ @results, $_ ];
}

# while uses less memory than for:
# the former is an iterator

while (my ($key, $aref ) = each %input)
{
    my $best = reduce {
       keep_best( $a, $b ) ? $a : $b
    } @$aref;

    print $best->[-1], "\n";
}


### Python (re-working John's code) ###

import sys

def keep_best(best, current):

    ACTUAL_STATE = 1
    # John had these swapped
    DESIRED_STATE = 5
    EXPIRY_DATE = 6

    keep = True
    if current[ACTUAL_STATE] == current[DESIRED_STATE]:
        if best[ACTUAL_STATE] == best[DESIRED_STATE]:
            if current[EXPIRY_DATE] > best[EXPIRY_DATE]:
                keep = False
        else:
            keep = False
    else:
        if (best[ACTUAL_STATE] != best[ACTUAL_STATE]
        and current[EXPIRY_DATE] > best[EXPIRY_DATE]):
            keep = False
    return keep

def process_file(opened_file=sys.stdin):

    PIID = 0
    recs = {}

    for line in opened_file:
        line = line.rstrip('\n')
        row = line.split('\t')
        row.append(line)
        piid = row[PIID]
        if piid not in recs:
            recs[piid] = []
        recs[piid].append(row)

    for piid in recs:
        best = reduce(lambda b, c: keep_best(b, c) and b or c,
recs[piid])
        print best[-1]

if __name__ == "__main__":
    process_file()


# "reduce" seems to be Lispish, Pythonic, and Perlish!

--
Hope this helps,
Steve





More information about the Python-list mailing list