Toy interpolation code [was Re: Draft PEP: string interpolation with backquotes]

Nick Mathewson QnickQm at alum.mit.edu
Wed Dec 5 23:59:05 EST 2001


One thing I don't understand from the Interpolation Wars is why people
feel Python needs any new syntax for this.  Here's some example code
that shows how to implement four kinds of proposed interpolation.

They are:
   "naive backticks"  (i.e., "`a`" == str(a); 
                       "``a`+"b"`" is an error)
   "smart backticks"  (i.e., "`a`" == str(a); 
                       "``a`+"b"`" == str(repr(a)+"b"))
   "dollar-quoting"   (i.e., "a$b$c" == "a"+str(b)+"c")
   "perl-style"       (i.e., "1+1=$(1+1)" == "1+1=2",
                             "Major $sys.version_info[0]" == "Major 2",
                             x=5; "x=$x" == "x=5".)

The interface works in two ways:
   1. Immediate
        i(string, quotestyle='d2')
                        (for quotestyle in('bt1','bt2','d1','d2'))
   2. Precompiled
        pattern = I(string, quotestyle='d2')
        print str(pattern)
        print pattern % namespace

BUGS:
   1. This code is not tested well enough.
   2. It is not optimized either.
   3. It is not commented well enough. 
   4. It doesn't fail gracefully when it sees bad syntax.

============================================================
#!/usr/bin/python

import sys

# Needs version >= 2.
assert sys.version_info[0] >= 2

# If we don't have sys._getframe, make one.
if hasattr(sys, "_getframe"):
    _getframe = sys._getframe
else:
    def _getframe():
	try:
	    1/0
	except:
	    tb = sys.exc_traceback
	    return tb.tb_frame.f_back


def _comp_backtick1(s, ch='`'):
    """Given a format string, returns a code object that evaluates to 
       the format string, with all values between backquotes evaluated
       and replaced by their string values.
       
       (The argument ch may be used to provide a different backtick value.)
       """

    exp = []
    i = 0
    while 1:
	bt = s.find(ch, i)
	if bt < 0:
	    exp.append(repr(s[i:]))
	    break
	if bt > i:
	    exp.append(repr(s[i:bt]))

	bt2 = s.find(ch, bt+1)
	if bt2 < 0:
	    raise "Bad format"
	exp.append("str((%s))" %  s[bt+1:bt2])
	i = bt2+1
	if i >= len(s):
	    break
	
    if len(exp) == 1:
	exp = exp[0]
    else:
	exp = '"".join( (%s) )' % ", ".join(exp)

    return compile(exp, '<interpolated string>', 'eval')

def _get_btexpr_at(s,i,ch='`', min=None):
    """Returns (expr, end), where expr is the smallest prefix of s[i:]
       of length>=(min-i) that compiles into a syntactically
       well-formed python expression, and where end is the index of s 
       immediately following expr."""

    if min == None:
	bt = i+1
    else:
	bt = min
    while bt < len(s):
	bt = s.find(ch, bt)
	if bt < 0:
	    raise "Bad format"
	try:
	    compile(s[i:bt+1], '', 'eval')
	    return s[i+1:bt], bt+1
	except SyntaxError:
	    pass
	bt = bt + 1

    raise "Bad Expression"

def _comp_backtick2(s, ch='`', fn=_get_btexpr_at):
    """Given a format string, returns a code object that evaluates to 
       the format string, with all backquoted chunks evaluated
       and replaced by their string values.

       (This differs from _comp_backtick1 in that _comp_backtick1 always
       looks for the next backtick, whereas _comp_backtick2 understands
       Python expressions.  IOW, 
             _comp_backtick_1(' `"1"+`2`` ') will turn into
                 compile('''"".join((' ',"1"+,'2','',' ')'''), whereas 
             _comp_backtick_1(' `"1"+`2`` ') will turn more sensibly into
                 compile('''"".join((' ',"1"+`2`,' '))'''. )
       """

    exp = []
    i = 0    
    while 1:
	bt = s.find(ch, i)
	if bt < 0:
	    exp.append(repr(s[i:]))
	    break
	if bt > i:
	    exp.append(repr(s[i:bt]))
	    
	btexpr, i = fn(s,bt)
	exp.append(btexpr)

	if i >= len(s):
	    break
	
    if len(exp) == 1:
	exp = exp[0]
    else:
	exp = '"".join( (%s) )' % ", ".join(exp)

    return compile(exp, '<interpolated string>', 'eval')
    
def _comp_dollars1(s):
    """Given a format string, returns a code object that evaluates to 
       the format string, with all values between dollars evaluated
       and replaced by their string values.
       """
    return _comp_backtick1(s,'$')

_endchars = {
    '(' : ')',
    '[' : ']'
    } 

def _isidch(ch):
    return ch.isalnum() or ch in '_.'

def _get_dlr_expr_at(s, i):
    """Given a string s and an index i, returns a tuple (expr, end)
       such that expr is the longest possible "interpolated expression"
       beginning at s[i], and end is the index immedatedly after expr.
     
       An interpolated expression is either:
          1.  A parenthesized expression     (1+2+4)
          2.  A dotted name                  sys.version
          3.  1,2, or 3 followed by an index, an attribute, or a call.
          4.  A string.
        """

    start = i
    assert s[i] == '$'
    i += 1
    if s[i] == '$':
	return '"$"', i+1

    #BUG: Handle $a.b, $a().b()
    if s[i].isalnum() or s[i] == "(":
	while i < len(s) and _isidch(s[i]):
	    i += 1
	if i == len(s):
	    return "str(%s)" % s[start+1:], i
	ch = s[i]
	endch = _endchars.get(ch)
	if not endch or endch == ch:
	    return "str(%s)" % s[start+1:i], i

	e, i2 = _get_btexpr_at(s, start+1, endch)
	while i2 < len(s) and (s[i2] =='.' or _endchars.get(s[i2])):
	    if s[i2] == '.':
		while i2 < len(s) and _isidch(s[i2]):
		    i2 += 1
		continue
	    try:
		e, next_i2 = _get_btexpr_at(s, 
					    start+1, 
					    _endchars[s[i2]],
					    i2+1)
		i2 = next_i2
	    except:
		break

	return "str(%s)" % s[start+1:i2], i2
    elif s[i] in "\"\'`":
	e, i2 = _get_btexpr_at(s, i, s[i])
	return s[i:i2], i2
    else:
	raise "Bad format"

def _comp_dollars2(s):
    return _comp_backtick2(s,'$', _get_dlr_expr_at)


_modes = { 'bt1': _comp_backtick1,
	   'bt2': _comp_backtick2,
	   'd1':  _comp_dollars1,
	   'd2':  _comp_dollars2 }

class I:
    def __init__(self, s, mode='d2'):
	cfn = _modes[mode]
	self.compiled = cfn(s)

    def __mod__(self, vars):
	return eval(self.compiled, vars, vars)

    def __str__(self):
	f = _getframe().f_back
	return eval(self.compiled, f.f_globals, f.f_locals)
	
def i(s,mode="d2"):
    loc = _getframe().f_back.f_locals
    return I(s,mode) % loc

if __name__ == '__main__':
    assert i("1+1=`1+1`", 'bt1') == '1+1=2'
    assert i("1+1=`'1'+`1``", 'bt2') == '1+1=11'
    x = 2
    assert i("$x$+1=$x+1$", 'd1') == '2+1=3'
    assert i("$x+1=$(x+1)", 'd2') == '2+1=3'
    assert i("1+1=$x") == '1+1=2'
    xyz=9
    assert i("3*3=$xyz") == '3*3=9'
    assert i("3*3+2=$xyz+2") == '3*3+2=9+2'
    assert i("3*3+2=$(xyz+2)") == '3*3+2=11'
    assert i("3*3+2=$(11)") == '3*3+2=11'
    assert i("2**10=$(2**10)") == '2**10=1024'
    x = 3    
    assert i("$x**2=$(x**2)") == '3**2=9'
    x = [1,2,3,[4,5]]
    assert i("x[1]=$x[1]") == 'x[1]=2'
    assert i("x[3][0]=$x[3][0]") == 'x[3][0]=4' 
    
    def power(e):
	def f(n,e=e):
	    return n**e
	return f
    
    assert i("2**10=$power(10)(2)") == '2**10=1024'

    class X:
	pass

    x = X()
    x.y = 3
    x.z = lambda q:q*q
    x.z = lambda q:q*q
    def getsys(): return sys
    x.getsys = getsys

    assert i("=> $x.y") == '=> 3'
    assert i("=> $x.z(10)") == '=> 100'	    
    assert i("=> $x.getsys().version_info[0]") == '=> 2'
    
    print "OK"

============================================================


-- 
 Nick Mathewson    <Q nick Q m at alum dot mit dot edu>
                      Remove Q's to respond.  No spam.



More information about the Python-list mailing list