Enumerating formatting strings
Michael Spencer
mahs at telcopartners.com
Thu Apr 21 16:47:51 EDT 2005
Steve Holden wrote:
> Michael Spencer wrote:
>
>> Andrew Dalke wrote:
>>
>>> I see you assume that only \w+ can fit inside of a %()
>>> in a format string. The actual Python code allows anything
>>> up to the balanced closed parens.
>>>
>> Gah! I guess that torpedoes the regexp approach, then.
>>
>> Thanks for looking at this
>>
>> Michael
>>
> While Andrew may have found the "fatal flaw" in your scheme, it's worth
> pointing out that it works just fine for my original use case.
>
> regards
> Steve
Thanks. Here's a version that overcomes the 'fatal' flaw.
class StringFormatInfo(object):
def __init__(self, template):
self.template = template
self.parse()
def tokenizer(self):
lexer = TinyLexer(self.template)
self.format_type = "POSITIONAL"
while lexer.search("\%"):
if lexer.match("\%"):
continue
format = {}
name = lexer.takeparens()
if name is not None:
self.format_type = "MAPPING"
format['name'] = name
format['conversion'] = lexer.match("[\#0\-\+]")
format['width'] = lexer.match("\d+|\*")
format['precision'] = lexer.match("\.") and \
lexer.match("\d+|\*") or None
format['lengthmodifier'] = lexer.match("[hlL]")
ftype = lexer.match('[diouxXeEfFgGcrs]')
if not ftype:
raise ValueError
else:
format['type'] = ftype
yield format
def parse(self):
self.formats = formats = list(self.tokenizer())
if self.format_type == "MAPPING":
self.format_names = dict((format['name'], format['type'])
for format in formats)
else:
format_names = []
for format in formats:
if format['width'] == '*':
format_names.append('width')
if format['precision'] == '*':
format_names.append('precision')
format_names.append(format['type'])
self.format_names = tuple(format_names)
def __mod__(self, values):
return self.template % values
def __repr__(self):
return "%s Template: %s\nArguments: %s" % \
(self.format_type, self.template, self.format_names)
__str__ = __repr__
SFI = StringFormatInfo
def tests():
print SFI('%(arg1)s %% %(arg2).*f %()s %s')
print SFI('%s %*.*d %*s')
print SFI('%(this(is)a.--test!)s')
import re
class TinyLexer(object):
def __init__(self, text):
self.text = text
self.ptr = 0
self.len = len(text)
self.re_cache = {}
def match(self, regexp, consume = True, anchor = True):
if isinstance(regexp, basestring):
cache = self.re_cache
if regexp not in cache:
cache[regexp] = re.compile(regexp)
regexp = cache[regexp]
matcher = anchor and regexp.match or regexp.search
match = matcher(self.text, self.ptr)
if not match:
return None
if consume:
self.ptr = match.end()
return match.group()
def search(self, regexp, consume = True):
return self.match(regexp, consume=True, anchor=False)
def takeparens(self):
start = self.ptr
if self.text[start] != '(':
return None
out = ''
level = 1
self.ptr += 1
while self.ptr < self.len:
nextchar = self.text[self.ptr]
level += (nextchar == '(') - (nextchar == ')')
self.ptr += 1
if level == 0:
return out
out += nextchar
raise ValueError, "Unmatched parentheses"
More information about the Python-list
mailing list