Toy interpolation code [was Re: Draft PEP: string interpolation with backquotes]
Nick Mathewson
QnickQm at alum.mit.edu
Wed Dec 5 23:59:05 EST 2001
One thing I don't understand from the Interpolation Wars is why people
feel Python needs any new syntax for this. Here's some example code
that shows how to implement four kinds of proposed interpolation.
They are:
"naive backticks" (i.e., "`a`" == str(a);
"``a`+"b"`" is an error)
"smart backticks" (i.e., "`a`" == str(a);
"``a`+"b"`" == str(repr(a)+"b"))
"dollar-quoting" (i.e., "a$b$c" == "a"+str(b)+"c")
"perl-style" (i.e., "1+1=$(1+1)" == "1+1=2",
"Major $sys.version_info[0]" == "Major 2",
x=5; "x=$x" == "x=5".)
The interface works in two ways:
1. Immediate
i(string, quotestyle='d2')
(for quotestyle in('bt1','bt2','d1','d2'))
2. Precompiled
pattern = I(string, quotestyle='d2')
print str(pattern)
print pattern % namespace
BUGS:
1. This code is not tested well enough.
2. It is not optimized either.
3. It is not commented well enough.
4. It doesn't fail gracefully when it sees bad syntax.
============================================================
#!/usr/bin/python
import sys
# Needs version >= 2.
assert sys.version_info[0] >= 2
# If we don't have sys._getframe, make one.
if hasattr(sys, "_getframe"):
_getframe = sys._getframe
else:
def _getframe():
try:
1/0
except:
tb = sys.exc_traceback
return tb.tb_frame.f_back
def _comp_backtick1(s, ch='`'):
"""Given a format string, returns a code object that evaluates to
the format string, with all values between backquotes evaluated
and replaced by their string values.
(The argument ch may be used to provide a different backtick value.)
"""
exp = []
i = 0
while 1:
bt = s.find(ch, i)
if bt < 0:
exp.append(repr(s[i:]))
break
if bt > i:
exp.append(repr(s[i:bt]))
bt2 = s.find(ch, bt+1)
if bt2 < 0:
raise "Bad format"
exp.append("str((%s))" % s[bt+1:bt2])
i = bt2+1
if i >= len(s):
break
if len(exp) == 1:
exp = exp[0]
else:
exp = '"".join( (%s) )' % ", ".join(exp)
return compile(exp, '<interpolated string>', 'eval')
def _get_btexpr_at(s,i,ch='`', min=None):
"""Returns (expr, end), where expr is the smallest prefix of s[i:]
of length>=(min-i) that compiles into a syntactically
well-formed python expression, and where end is the index of s
immediately following expr."""
if min == None:
bt = i+1
else:
bt = min
while bt < len(s):
bt = s.find(ch, bt)
if bt < 0:
raise "Bad format"
try:
compile(s[i:bt+1], '', 'eval')
return s[i+1:bt], bt+1
except SyntaxError:
pass
bt = bt + 1
raise "Bad Expression"
def _comp_backtick2(s, ch='`', fn=_get_btexpr_at):
"""Given a format string, returns a code object that evaluates to
the format string, with all backquoted chunks evaluated
and replaced by their string values.
(This differs from _comp_backtick1 in that _comp_backtick1 always
looks for the next backtick, whereas _comp_backtick2 understands
Python expressions. IOW,
_comp_backtick_1(' `"1"+`2`` ') will turn into
compile('''"".join((' ',"1"+,'2','',' ')'''), whereas
_comp_backtick_1(' `"1"+`2`` ') will turn more sensibly into
compile('''"".join((' ',"1"+`2`,' '))'''. )
"""
exp = []
i = 0
while 1:
bt = s.find(ch, i)
if bt < 0:
exp.append(repr(s[i:]))
break
if bt > i:
exp.append(repr(s[i:bt]))
btexpr, i = fn(s,bt)
exp.append(btexpr)
if i >= len(s):
break
if len(exp) == 1:
exp = exp[0]
else:
exp = '"".join( (%s) )' % ", ".join(exp)
return compile(exp, '<interpolated string>', 'eval')
def _comp_dollars1(s):
"""Given a format string, returns a code object that evaluates to
the format string, with all values between dollars evaluated
and replaced by their string values.
"""
return _comp_backtick1(s,'$')
_endchars = {
'(' : ')',
'[' : ']'
}
def _isidch(ch):
return ch.isalnum() or ch in '_.'
def _get_dlr_expr_at(s, i):
"""Given a string s and an index i, returns a tuple (expr, end)
such that expr is the longest possible "interpolated expression"
beginning at s[i], and end is the index immedatedly after expr.
An interpolated expression is either:
1. A parenthesized expression (1+2+4)
2. A dotted name sys.version
3. 1,2, or 3 followed by an index, an attribute, or a call.
4. A string.
"""
start = i
assert s[i] == '$'
i += 1
if s[i] == '$':
return '"$"', i+1
#BUG: Handle $a.b, $a().b()
if s[i].isalnum() or s[i] == "(":
while i < len(s) and _isidch(s[i]):
i += 1
if i == len(s):
return "str(%s)" % s[start+1:], i
ch = s[i]
endch = _endchars.get(ch)
if not endch or endch == ch:
return "str(%s)" % s[start+1:i], i
e, i2 = _get_btexpr_at(s, start+1, endch)
while i2 < len(s) and (s[i2] =='.' or _endchars.get(s[i2])):
if s[i2] == '.':
while i2 < len(s) and _isidch(s[i2]):
i2 += 1
continue
try:
e, next_i2 = _get_btexpr_at(s,
start+1,
_endchars[s[i2]],
i2+1)
i2 = next_i2
except:
break
return "str(%s)" % s[start+1:i2], i2
elif s[i] in "\"\'`":
e, i2 = _get_btexpr_at(s, i, s[i])
return s[i:i2], i2
else:
raise "Bad format"
def _comp_dollars2(s):
return _comp_backtick2(s,'$', _get_dlr_expr_at)
_modes = { 'bt1': _comp_backtick1,
'bt2': _comp_backtick2,
'd1': _comp_dollars1,
'd2': _comp_dollars2 }
class I:
def __init__(self, s, mode='d2'):
cfn = _modes[mode]
self.compiled = cfn(s)
def __mod__(self, vars):
return eval(self.compiled, vars, vars)
def __str__(self):
f = _getframe().f_back
return eval(self.compiled, f.f_globals, f.f_locals)
def i(s,mode="d2"):
loc = _getframe().f_back.f_locals
return I(s,mode) % loc
if __name__ == '__main__':
assert i("1+1=`1+1`", 'bt1') == '1+1=2'
assert i("1+1=`'1'+`1``", 'bt2') == '1+1=11'
x = 2
assert i("$x$+1=$x+1$", 'd1') == '2+1=3'
assert i("$x+1=$(x+1)", 'd2') == '2+1=3'
assert i("1+1=$x") == '1+1=2'
xyz=9
assert i("3*3=$xyz") == '3*3=9'
assert i("3*3+2=$xyz+2") == '3*3+2=9+2'
assert i("3*3+2=$(xyz+2)") == '3*3+2=11'
assert i("3*3+2=$(11)") == '3*3+2=11'
assert i("2**10=$(2**10)") == '2**10=1024'
x = 3
assert i("$x**2=$(x**2)") == '3**2=9'
x = [1,2,3,[4,5]]
assert i("x[1]=$x[1]") == 'x[1]=2'
assert i("x[3][0]=$x[3][0]") == 'x[3][0]=4'
def power(e):
def f(n,e=e):
return n**e
return f
assert i("2**10=$power(10)(2)") == '2**10=1024'
class X:
pass
x = X()
x.y = 3
x.z = lambda q:q*q
x.z = lambda q:q*q
def getsys(): return sys
x.getsys = getsys
assert i("=> $x.y") == '=> 3'
assert i("=> $x.z(10)") == '=> 100'
assert i("=> $x.getsys().version_info[0]") == '=> 2'
print "OK"
============================================================
--
Nick Mathewson <Q nick Q m at alum dot mit dot edu>
Remove Q's to respond. No spam.
More information about the Python-list
mailing list