Shell pattern to regular expression code

Stephen Kennedy steve9000 at users.sf.net
Wed Aug 6 15:12:03 EDT 2003


Hi all, I was searched on the web for code to handle {} as in *.{foo,bar}
(fnmatch does not handle this case) and surprisingly didn't find any.
So I wrote some. Here it is:

Stephen

#! /usr/bin/env python
                                                                                                                          
import re
                                                                                                                          
def translate(pat):
    """Translate a shell PATTERN to a regular expression.
                                                                                                                          
    There is no way to quote meta-characters.
    """
                                                                                                                          
    i, n = 0, len(pat)
    res = ''
    while i < n:
        c = pat[i]
        i = i+1
        if c == '*':
            res += '.*'
        elif c == '?':
            res += '.'
        elif c == '[':
            try:
                j = pat.index(']', i)
                stuff = pat[i:j]
                i = j+1
                if stuff[0] == '!':
                    stuff = '^%s' % stuff[1:]
                elif stuff[0] == '^':
                    stuff = r'\^%s' % stuff[1:]
                res += '[%s]' % stuff
            except ValueError:
                res += r'\['
        elif c == '{':
            try:
                j = pat.index('}', i)
                stuff = pat[i:j]
                i = j+1
                res += '(%s)' % "|".join([translate(p)[:-1] for p in stuff.split(",")])
            except ValueError:
                res += r'\{'
        else:
            res += re.escape(c)
    return res + "$"
                                                                                                                          
lof = open("sourcelist").readlines()
pats = ["*.*", "*.[ac]", "*.[ac]*", "*.[!ac]*", "*.[ac]x*", "*.{gif,jpg}", "*.{a*,y*}"]
for pat in pats[-1:]:
    print "***", pat, "***", translate(pat)
    regex = re.compile( translate(pat) )
    print "\n".join( [f.strip() for f in lof if regex.match(f)!=None][:20] )





More information about the Python-list mailing list