[Mailman-Users] Rotating Archives

Alan Batie mailman at alan.batie.org
Wed Jan 14 06:43:04 CET 2004


On Tue, Jan 13, 2004 at 10:25:49PM -0500, Bryan Carbonnell wrote:
> How would I parse the mbox file to remove anything older than XX from 
> it so I can regenerate the archives?

Here's a script I've been using for several years to rotate my
mailboxes (I hope it's not sacriledge to post a perl script to
a list at python.org ;-) ).

If the mbox arguments are files, it just does that file, if directories,
it does all files in the directories, moving messages from mbox to
archive_mbox, keeping the specified number of days worth of messages in
the original:

/usr/local/bin/agemail Mail.30days Mail.archive 90 > /dev/null
/usr/local/bin/agemail Mail Mail.30days 30 > /dev/null


#!/usr/bin/perl
eval "exec /usr/bin/perl -S $0 $*"
    if $running_under_some_shell;

$| = 1;

if ($#ARGV != 2) {
    print "Usage: $0 mbox/dir archive_mbox/dir days_to_keep\n";
    exit(1);
}

#  If archive_dir == /dev/null, toss messages
$toss = 0;
$folder_dir = $ARGV[0];
$archive_dir = $ARGV[1];
if ($archive_dir eq "/dev/null") {
    $toss = 1;
}

$keep_days = $ARGV[2];
if ($keep_days =~ /[^0-9]/) {
    print STDERR "Number of days parameter must be numeric.\n";
    exit(1);
}
$keep_time = $keep_days * 24 * 3600;
$cur_time = time;

if (-d $folder_dir) {
    if ($toss == 1 || -d $archive_dir) {
	unless (opendir(FOLDERS, $folder_dir)) {
	    print STDERR "Can't open '$folder_dir': $!\n";
	    exit(1);
	}

	@folder_list = readdir(FOLDERS);
	closedir(FOLDERS);
    } else {
	print STDERR "Inconsistent: mbox dir, but archive is not dir\n";
	print STDERR "(mbox='$folder_dir', archive='$archive_dir')\n";
	exit(1);
    }
} elsif (-f $folder_dir) {
    if ($toss == 1 ||
	    !-e $archive_dir ||
	    (-e $archive_dir && -f $archive_dir)) {

	@folder_list = ($folder_dir);
	$folder_dir = "";
    } else {
	print STDERR "Inconsistent: mbox file, but archive is not file\n";
	print STDERR "(mbox='$folder_dir', archive='$archive_dir')\n";
	exit(1);
    }
}

print;

foreach $mbox (@folder_list) {
    if ($folder_dir eq "") {
	$mailbox = $mbox;
	$archive = $archive_dir;
	$newbox = $mbox . ".$$";
    } else {
	$mailbox = $folder_dir . "/" . $mbox;
	$archive = $archive_dir . "/" . $mbox;
	$newbox = $folder_dir . "/" . $mbox . ".$$";
    }

    if ( ! -f $mailbox ) {
	if ($folder_dir eq "") {
	    print STDERR "Mailbox '$mailbox' not a file\n";
	}
	next;
    }

    print "\r$mbox...           ";

    ($start_dev, $start_ino, $start_mode, $start_nlink, $start_uid, $start_gid,
     $start_rdev, $start_size, $start_atime, $start_mtime, $start_ctime,
     $start_blksize, $start_blocks) = stat($mailbox);

    unless (open(mailbox, "<$mailbox")) {
        print STDERR "Can't read $mailbox: $!\n";
	exit(1);
    }
    unless (open(newbox, ">$newbox")) {
        print STDERR "Can't write to $newbox: $!\n";
	exit(1);
    }
    if (!$toss) {
	unless (open(archive, ">>$archive")) {
	    print STDERR "Can't append to $archive: $!\n";
	    exit(1);
	}
    }

    #  In case there's junk in front of the first From...
    $arch_it = 1;
    while (<mailbox>) {
	if (/^From /) {
	    $msg_time = do parse_date($_);
	    if ($cur_time - $msg_time < $keep_time) {
		$arch_it = 0;
	    } else {
		$arch_it = 1;
	    }
	}
	if ($arch_it == 0) {
	    print newbox $_ || die "Write to $newbox failed: $!\n";
	} else {
	    if (!$toss) {
		print archive $_ || die "Write to $archive failed: $!\n";
	    }
	}
    }

    close(mailbox);
    close(newbox);
    if (!$toss) {
	close(archive);
    }

    ($stop_dev, $stop_ino, $stop_mode, $stop_nlink, $stop_uid, $stop_gid,
     $stop_rdev, $stop_size, $stop_atime, $stop_mtime, $stop_ctime,
     $stop_blksize, $stop_blocks) = stat($mailbox);

    if ($stop_mtime > $start_mtime) {
	print STDERR "$mailbox was modified after start of archive ---\n";
	print STDERR "Updated version left in $newbox.\n";
	exit(1);
    }

    unless (unlink($mailbox)) {
	print STDERR "Unable to unlink $mailbox: $!\n";
	exit(1);
    }

    if (!$toss) {
	if ( -z $archive ) {
	    unless (unlink($archive)) {
		print STDERR "Can't unlink empty archive '$archive': $!\n";
		exit(1);
	    }
	}
    }

    if ( -s $newbox ) {
	unless (link($newbox, $mailbox)) {
	    print STDERR "Unable to link $newbox to $mailbox: $!\n";
	    exit(1);
	}
    }

    unlink($newbox);
    # print "done\n";
}

print "\n";
exit(0);

#  Process From_ lines to get the number of days since Jan. 1, 1970:
#
#  From aahz!batie Fri Aug  3 08:30:41 1990

sub parse_date {
    local($from_space) = $_;
    
    $month_tbl{"Jan"} = 0;
    $month_tbl{"Feb"} = 1;
    $month_tbl{"Mar"} = 2;
    $month_tbl{"Apr"} = 3;
    $month_tbl{"May"} = 4;
    $month_tbl{"Jun"} = 5;
    $month_tbl{"Jul"} = 6;
    $month_tbl{"Aug"} = 7;
    $month_tbl{"Sep"} = 8;
    $month_tbl{"Oct"} = 9;
    $month_tbl{"Nov"} = 10;
    $month_tbl{"Dec"} = 11;

    $days_so_far{"Jan"} = 0;
    $days_so_far{"Feb"} = 31;
    $days_so_far{"Mar"} = 59;
    $days_so_far{"Apr"} = 90;
    $days_so_far{"May"} = 120;
    $days_so_far{"Jun"} = 151;
    $days_so_far{"Jul"} = 181;
    $days_so_far{"Aug"} = 212;
    $days_so_far{"Sep"} = 243;
    $days_so_far{"Oct"} = 273;
    $days_so_far{"Nov"} = 304;
    $days_so_far{"Dec"} = 334;

    $sec_per_year = 31536000;
    $sec_per_day = 86400;
    $sec_per_hour = 3600;
    $sec_per_minute = 60;
    $timezone = 8;

    ($d1, $user, $d3, $month, $day, $time, $year) = split(/ +/, $from_space);
    ($hours, $minutes, $seconds) = split(/:/, $time);
    $month_num = $month_tbl{$month};

    #  determine the number of seconds since the beginning of the universe
    $clock = ($year - 1970) * $sec_per_year;
    if ($month ne "Jan") {
	$clock = $clock + $days_so_far{$month} * $sec_per_day;
    }
    $clock = $clock + ($day - 1) * $sec_per_day;
    $clock = $clock + $hours * $sec_per_hour;
    $clock = $clock + $minutes * $sec_per_minute;
    $clock = $clock + $seconds;

    #  calculate in the leap year fudge factors
    if ($year gt 1971) {
	$fudge_days = 0;
	$fudge_days = int(($year - 1972) / 4);

	if (($year % 4) != 0 || (($year % 4) == 0 && $month_num > 1)) {
		$fudge_days++;
	}
	$clock = $clock + $fudge_days * $sec_per_day;
    }

    #  calculate in the time shift westward from Greenwich
    $clock = $clock + $timezone * $sec_per_hour;

    #  worry about daylight savings time
    ($d1, $d2, $d3, $d4, $d5, $d6, $d7, $d8, $isdst) = localtime(time);
    if ($isdst != 0) {
	$clock = $clock - $sec_per_hour;
    }
    return $clock;
}

-- 
Alan Batie                   ______    alan.batie.org                Me
alan at batie.org            \    /    www.qrd.org         The Triangle
PGPFP DE 3C 29 17 C0 49 7A    \  /     www.pgpi.com   The Weird Numbers
27 40 A5 3C 37 4A DA 52 B9     \/      spamassassin.taint.org  NO SPAM!

To announce that there must be no criticism of the President, or that we
are to stand by the President, right or wrong, is not only unpatriotic
and servile, but is morally treasonable to the American public.
-Theodore Roosevelt, 26th US President (1858-1919)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 305 bytes
Desc: not available
Url : http://mail.python.org/pipermail/mailman-users/attachments/20040113/1a6f2e10/attachment.pgp 


More information about the Mailman-Users mailing list