Enumerating formatting strings
Bengt Richter
bokr at oz.net
Thu Apr 21 13:26:40 EDT 2005
On Wed, 20 Apr 2005 17:09:16 -0700, Michael Spencer <mahs at telcopartners.com> wrote:
>Andrew Dalke wrote:
>
>> I see you assume that only \w+ can fit inside of a %()
>> in a format string. The actual Python code allows anything
>> up to the balanced closed parens.
>>
>Gah! I guess that torpedoes the regexp approach, then.
>
>Thanks for looking at this
>
I brute-forced a str subclass that will call a mapping object's __getitem__ for both
kinds of format spec and '*' specs. Just to see what it would take. I didn't go the whole
way loking for a __format__ method on the mapping object, along the lines I suggested in
a previous post. Someone else's turn again ;-)
This has not been tested thoroughly...
The approach is to scan the original format string and put pieces into an out list
and then ''.join that for final ouput. The pieces are the non-format parts and
string from doing the formatting as formats are found. %(name) format args are
retrieved from the mapping object by name as usual, and saved as the arg for
rewritten plain format made from the tail after %(name), which is the same tail
as %tail, except that the value is already retrieved. Next '*' or decimal strings
are packed into the rewritten format, etc. The '*' values are retrieved by integer
values passed to mapobj[i] and incremented each time. If the arg value was not
retrieved by name, that's another mapobj[i]. Then the conversion is done with
the plain format. The tests have MixFmt(fmt, verbose=True) % MapObj(position_params, namedict)
and the verbose prints each rewritten format and arg and result as it appends them to out.
----< mixfmt.py >------------------------------------------------------------------------
# mixfmt.py -- a string subclass with __mod__ permitting mixed '%(name)s %s' formatting
import re
class MixFmtError(Exception): pass
class MixFmt(str):
def __new__(cls, s, **kw):
return str.__new__(cls, s)
def __init__(self, *a, **kw):
self._verbose = kw.get('verbose')
# Michael Spencer's regex, slightly modded, but only for reference, since XXX note
parse_format = re.compile(r'''
(
\% # placeholder
(?:\(\w*\))? # 0 or 1 "named" groups XXX "%( (any)(balanced) parens )s" is legal!
[\#0\-\+]? # 0 or 1 conversion flags
(?:\* | \d+)? # optional minimum conversion width
(?:\.\* | \.\d+)? # optional precision
[hlL]? # optional length modifier
[diouxXeEfFgGcrs] # conversion type - note %% omitted
)
''',
re.VERBOSE)
def __mod__(self, mapobj):
"""
The '%' MixFmt string operation allowing both %(whatever)fmt and %fmt
by calling mapobj[whatever] for named args, and mapobj[i] sequentially
counting i for each '*' width or precision spec, and unnamed args.
It is up to the mapobj to handle this. See MapObj example used in tests.
"""
out = []
iarg = 0
pos, end = 0, len(self)
sentinel = object()
while pos<end:
pos, last = self.find('%', pos), pos
while pos>=0 and self[pos:pos+2] == '%%':
pos+=2
pos = self.find('%', pos)
if pos<0: out.append(self[last:].replace('%%','%')); break
# here we have start of fmt with % at pos
out.append(self[last:pos].replace('%%','%'))
last = pos
plain_arg = sentinel
pos = pos+1
if self[pos]=='(':
# scan for balanced matching ')'
brk = 1; pos+=1
while brk>0:
nextrp = self.find(')',pos)
if nextrp<0: raise MixFmtError, 'no match for "(" at %s'%(pos+1)
nextlp = self.find('(', pos)
if nextlp>=0:
if nextlp<nextrp:
brk+=1; pos = nextlp+1
else:
pos = nextrp+1
brk-=1
else:
brk-=1
pos = nextrp+1
plain_arg = mapobj[self[last+2:pos-1]]
# else: normal part starts here, at pos
plain_fmt = '%'
# [\#0\-\+]? # 0 or 1 conversion flags
if pos<end and self[pos] in '#0-+':
plain_fmt += self[pos]; pos+=1
# (?:\* | \d+)? # optional minimum conversion width
if pos<end and self[pos]=='*':
plain_fmt += str(mapobj[iarg]); pos+=1; iarg+=1
elif pos<end and self[pos].isdigit():
eod = pos+1
while eod<end and self[eod].isdigit(): eod+=1
plain_fmt += self[pos:eod]
pos = eod
#(?:\.\* | \.\d+)? # optional precision
if self[pos] == '.':
plain_fmt += '.'
pos +=1
if pos<end and self[pos]=='*':
plain_fmt += str(mapobj[iarg]); pos+=1; iarg+=1
elif pos<end and self[pos].isdigit():
eod = pos+1
while eod<end and self[eod].isdigit(): eod+=1
plain_fmt += self[pos:eod]
pos = eod
#[hlL]? # optional length modifier
if pos<end and self[pos] in 'hlL': plain_fmt += self[pos]; pos+=1
#[diouxXeEfFgGcrs] # conversion type - note %% omitted
if pos<end and self[pos] in 'diouxXeEfFgGcrs': plain_fmt += self[pos]; pos+=1
else: raise MixFmtError, 'Bad conversion type %r at %s' %(self[pos], pos)
if plain_arg is sentinel: # need arg
plain_arg = mapobj[iarg]; iarg+=1
result = plain_fmt % (plain_arg,)
if self._verbose:
print ' -> %r %% %r => %r' % (plain_fmt, (plain_arg,), result)
out.append(result)
return ''.join(out)
class MapObj(object):
"""
Example for test.
Handles both named and positional (integer) keys
for MixFmt(fmtstring) % MapObj(posargs, namedict)
"""
def __init__(self, *args, **kw):
self.args = args
self.kw = kw
def __getitem__(self, i):
if isinstance(i, int): return self.args[i]
else:
try: return self.kw[i]
except KeyError: return '<KeyError:%r>'%i
def test(fmt, *args, **namedict):
print '\n==== test with:\n %r\n %s\n %s' %(fmt, args, namedict)
print MixFmt(fmt, verbose=True) % MapObj(*args, **namedict)
def testseq():
test('(no %%)')
test('%s', *['first'])
test('%(sym)s',**dict(sym='second'))
test('%s %*.*d %*s', *['third -- expect " 012 ab" after colon:', 5, 3, 12, 4, 'ab'])
test('%(arg1)s %% %(arg2).*f %()s %s', *[3, 'last'], **{
'arg1':'fourth -- expect " % 2.220 NULL? last" after colon:', 'arg2':2.22, '':'NULL?'})
#'%s %*.*d %*s', *['expect " 345 ab"??:', 2, 1, 12345, 4, 'ab'])
test('fifth -- non-key name: %(this(is)a.--test!)s')
if __name__ == '__main__':
import sys
if not sys.argv[1:]:
raise SystemExit,'Usage: python24 mixfmt.py -test | fmt ([key =] (s | (-i|-f) num)+ )*'
fmt, rawargs = sys.argv[1], iter(sys.argv[2:])
if fmt == '-test': testseq(); raise SystemExit
args = []
namedict = {}; to_name_dict=False
for arg in rawargs:
if arg == '-i': arg = int(rawargs.next())
if arg == '-f': arg = float(rawargs.next())
if arg == '=': to_name_dict = True
elif to_name_dict: namedict[args.pop()] = arg; to_name_dict=False
else: args.append(arg)
test(fmt, *args, **namedict)
-----------------------------------------------------------------------------------------
Result of py24 mixfmt.py -test:
[10:06] C:\pywk\pymods>py24 mixfmt.py -test
==== test with:
'(no %%)'
()
{}
(no %)
==== test with:
'%s'
('first',)
{}
-> '%s' % ('first',) => 'first'
first
==== test with:
'%(sym)s'
()
{'sym': 'second'}
-> '%s' % ('second',) => 'second'
second
==== test with:
'%s %*.*d %*s'
('third -- expect " 012 ab" after colon:', 5, 3, 12, 4, 'ab')
{}
-> '%s' % ('third -- expect " 012 ab" after colon:',) => 'third -- expect " 012 ab"
after colon:'
-> '%5.3d' % (12,) => ' 012'
-> '%4s' % ('ab',) => ' ab'
third -- expect " 012 ab" after colon: 012 ab
==== test with:
'%(arg1)s %% %(arg2).*f %()s %s'
(3, 'last')
{'': 'NULL?', 'arg1': 'fourth -- expect " % 2.220 NULL? last" after colon:', 'arg2': 2.2200000
000000002}
-> '%s' % ('fourth -- expect " % 2.220 NULL? last" after colon:',) => 'fourth -- expect " %
2.220 NULL? last" after colon:'
-> '%.3f' % (2.2200000000000002,) => '2.220'
-> '%s' % ('NULL?',) => 'NULL?'
-> '%s' % ('last',) => 'last'
fourth -- expect " % 2.220 NULL? last" after colon: % 2.220 NULL? last
==== test with:
'fifth -- non-key name: %(this(is)a.--test!)s'
()
{}
-> '%s' % ("<KeyError:'this(is)a.--test!'>",) => "<KeyError:'this(is)a.--test!'>"
fifth -- non-key name: <KeyError:'this(is)a.--test!'>
You can also run it interactively with one format and some args, e.g.,
[10:25] C:\pywk\pymods>py24 mixfmt.py
Usage: python24 mixfmt.py -test | fmt ([key =] (s | (-i|-f) num)+ )*
[10:25] C:\pywk\pymods>py24 mixfmt.py "%*.*f %(hi)s" -i 6 -i 3 -f 3.5 hi = hello
==== test with:
'%*.*f %(hi)s'
(6, 3, 3.5)
{'hi': 'hello'}
-> '%6.3f' % (3.5,) => ' 3.500'
-> '%s' % ('hello',) => 'hello'
3.500 hello
Regards,
Bengt Richter
More information about the Python-list
mailing list