Regular Expression - old regex module vs. re module

Jim Segrave jes at nl.demon.net
Fri Jun 30 06:45:51 EDT 2006


In article <1151607229.548737.145800 at d56g2000cwd.googlegroups.com>,
Steve <stever at cruzio.com> wrote:
>Hi All,
>
>I'm having a tough time converting the following regex.compile patterns
>into the new re.compile format.  There is also a differences in the
>regsub.sub() vs. re.sub()
>
>Could anyone lend a hand?
>
>
>import regsub
>import regex
>
>import re    # << need conversion to this module
>
>....
>
>    """Convert perl style format symbology to printf tokens.
>
>    Take a string and substitute computed printf tokens for perl style
>    format symbology.
>
>    For example:
>
>        ###.##    yields %6.2f
>        ########  yields %8d
>        <<<<<     yields %-5s
>    """

Perhaps not optimal, but this processes things as requested. Note that
all floats have to be done before any integer patterns are replaced.

==========================
#!/usr/local/bin/python

import re

"""Convert perl style format symbology to printf tokens.
Take a string and substitute computed printf tokens for perl style
format symbology.

For example:

###.##    yields %6.2f
########  yields %8d
<<<<<     yields %-5s
"""


# handle cases where there's no integer or no fractional chars
floatPattern = re.compile(r'(?<!\\)(#+\.(#*)|\.(#+))')
integerPattern = re.compile(r'(?<![\\.])(#+)(?![.#])')
leftJustifiedStringPattern = re.compile(r'(?<!\\)(<+)')
rightJustifiedStringPattern = re.compile(r'(?<!\\)(>+)')

def float_sub(matchobj):
    # fractional part may be in either groups()[1] or groups()[2]
    if matchobj.groups()[1] is not None:
        return "%%%d.%df" % (len(matchobj.groups()[0]),
                             len(matchobj.groups()[1]))
    else:
        return "%%%d.%df" % (len(matchobj.groups()[0]),
                             len(matchobj.groups()[2]))
    

def unperl_format(s):
    changed_things = 1
    while changed_things:
        # lather, rinse and repeat until nothing new happens
        changed_things = 0

        mat_obj = leftJustifiedStringPattern.search(s)
        if mat_obj:
            s = re.sub(leftJustifiedStringPattern, "%%-%ds" %
                       len(mat_obj.groups()[0]), s, 1)
            changed_things = 1

        mat_obj = rightJustifiedStringPattern.search(s)
        if mat_obj:
            s = re.sub(rightJustifiedStringPattern, "%%%ds" %
                       len(mat_obj.groups()[0]), s, 1)
            changed_things = 1

        # must do all floats before ints
        mat_obj = floatPattern.search(s)
        if mat_obj:
            s = re.sub(floatPattern, float_sub, s, 1)
            changed_things = 1
            # don't fall through to the int code
            continue
        
        mat_obj = integerPattern.search(s)
        if mat_obj:
            s = re.sub(integerPattern, "%%%dd" % len(mat_obj.groups()[0]),
                       s, 1)
            changed_things = 1
    return s

if __name__ == '__main__':
   testarray = ["integer: ####, integer # integer at end #",
	        "float ####.## no decimals ###. no int .### at end ###.",
                "Left string  <<<<<< short left string  <",
                "right string >>>>>> short right string >",
                "escaped chars \\#### \\####.## \\<\\<<<< \\>\\><<<"]


   for s in testarray:
       print("Testing: %s" % s)
       print "Result: %s" % unperl_format(s)
       print
    
======================

Running this gives

Testing: integer: ####, integer # integer at end #
Result: integer: %4d, integer %1d integer at end %1d

Testing: float ####.## no decimals ###. no int .### at end ###.
Result: float %7.2f no decimals %4.0f no int %4.3f at end %4.0f

Testing: Left string  <<<<<< short left string  <
Result: Left string  %-6s short left string  %-1s

Testing: right string >>>>>> short right string >
Result: right string %6s short right string %1s

Testing: escaped chars \#### \####.## \<\<<<< \>\><<<
Result: escaped chars \#%3d \#%6.2f \<\<%-3s \>\>%-3s

    

-- 
Jim Segrave           (jes at jes-2.demon.nl)




More information about the Python-list mailing list