[Python-Dev] Grammar help.

Thomas Wouters thomas@xs4all.net
Fri, 7 Jul 2000 14:08:56 +0200


--OwLcNYc0lM97+oe1
Content-Type: text/plain; charset=us-ascii

On Thu, Jul 06, 2000 at 06:53:51PM -0500, Guido van Rossum wrote:

> Greg's got it right.  It's a recursive descent a.k.a. LL(1) parser.
> The solution?  Try to rewrite the grammar to avoid the ambiguities.

> So we have [testlist | rangemaker] for listmaker, where testlist is
> test(','test)* and rangemaker is [test]':'[test].  (Note that you'll
> have to add [':'[test]] to support [lo:hi:step], but tht's not the
> problem here.)

> listmaker: [rangetail | test((','test)* | rangetail)]
> rangetail: ':' [test] [':' [test]]

Right, that's what I realized, too, after Greg told me what the problem was :)
I basically did this, though the first 'test' in rangetail isn't optional,
and I don't think it would make sense to have the 2nd test optional relative
to the colon. But if you insist, it's a 2-character change ;)

> The disadvantage is that you have to change the code generator to work
> withthis mess -- that's why people like LALR(1) and other parsers.
> But LL(1) was all I could create myself (Python uses a homegrown
> parser generator).

Actually, it turned out not to be that much of a mess. There's one 'if'
statement that might look a bit strange, but the rest is pretty
straightforward. I've attached the patch, it works ;) but it isn't finished
yet: I'll try to get rid of the code duplication, add some comments and
submit one relative to the list comprehension patch, so that when that one
goes in, this one can, too ;)

Don't forget to re-make Grammar if you test this patch out.

-- 
Thomas Wouters <thomas@xs4all.net>

Hi! I'm a .signature virus! copy me into your .signature file to help me spread!

--OwLcNYc0lM97+oe1
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="rangemaker.patch"

diff -crN --exclude=CVS src/Grammar/Grammar src-rangemaker/Grammar/Grammar
*** src/Grammar/Grammar	Tue Mar 28 18:49:00 2000
--- src-rangemaker/Grammar/Grammar	Fri Jul  7 10:32:19 2000
***************
*** 74,80 ****
  term: factor (('*'|'/'|'%') factor)*
  factor: ('+'|'-'|'~') factor | power
  power: atom trailer* ('**' factor)*
! atom: '(' [testlist] ')' | '[' [testlist] ']' | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+
  lambdef: 'lambda' [varargslist] ':' test
  trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
  subscriptlist: subscript (',' subscript)* [',']
--- 74,80 ----
  term: factor (('*'|'/'|'%') factor)*
  factor: ('+'|'-'|'~') factor | power
  power: atom trailer* ('**' factor)*
! atom: '(' [testlist] ')' | '[' [listmaker] ']' | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+
  lambdef: 'lambda' [varargslist] ':' test
  trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
  subscriptlist: subscript (',' subscript)* [',']
diff -crN --exclude=CVS src/Include/opcode.h src-rangemaker/Include/opcode.h
*** src/Include/opcode.h	Fri Jun 30 19:58:04 2000
--- src-rangemaker/Include/opcode.h	Fri Jul  7 13:05:13 2000
***************
*** 67,73 ****
  #define RETURN_VALUE	83
  
  #define EXEC_STMT	85
! 
  #define POP_BLOCK	87
  #define END_FINALLY	88
  #define BUILD_CLASS	89
--- 67,73 ----
  #define RETURN_VALUE	83
  
  #define EXEC_STMT	85
! #define BUILD_RANGE	86
  #define POP_BLOCK	87
  #define END_FINALLY	88
  #define BUILD_CLASS	89
diff -crN --exclude=CVS src/Python/ceval.c src-rangemaker/Python/ceval.c
*** src/Python/ceval.c	Fri Jun 30 19:58:05 2000
--- src-rangemaker/Python/ceval.c	Fri Jul  7 13:05:40 2000
***************
*** 65,70 ****
--- 65,71 ----
  static PyObject *cmp_outcome Py_PROTO((int, PyObject *, PyObject *));
  static int import_from Py_PROTO((PyObject *, PyObject *, PyObject *));
  static PyObject *build_class Py_PROTO((PyObject *, PyObject *, PyObject *));
+ static PyObject *build_range Py_PROTO((PyObject *, PyObject *, PyObject *));
  static int exec_statement Py_PROTO((PyFrameObject *,
  				    PyObject *, PyObject *, PyObject *));
  static PyObject *find_from_args Py_PROTO((PyFrameObject *, int));
***************
*** 1110,1115 ****
--- 1111,1127 ----
  			Py_DECREF(w);
  			break;
  		
+ 		case BUILD_RANGE:
+ 			w = POP();
+ 			v = POP();
+ 			u = POP();
+ 			x = build_range(u, v, w);
+ 			PUSH(x);
+ 			Py_DECREF(u);
+ 			Py_DECREF(v);
+ 			Py_DECREF(w);
+ 			break;
+ 		
  		case POP_BLOCK:
  			{
  				PyTryBlock *b = PyFrame_BlockPop(f);
***************
*** 2801,2806 ****
--- 2813,2897 ----
  		}
  	}
  	return PyClass_New(bases, methods, name);
+ }
+ 
+ /* Stolen from bltinmodule.c */
+ static long
+ get_len_of_range(lo, hi, step)
+ 	long lo;
+ 	long hi;
+ 	long step;	/* must be > 0 */
+ {
+ 	/* -------------------------------------------------------------
+ 	If lo >= hi, the range is empty.
+ 	Else if n values are in the range, the last one is
+ 	lo + (n-1)*step, which must be <= hi-1.  Rearranging,
+ 	n <= (hi - lo - 1)/step + 1, so taking the floor of the RHS gives
+ 	the proper value.  Since lo < hi in this case, hi-lo-1 >= 0, so
+ 	the RHS is non-negative and so truncation is the same as the
+ 	floor.  Letting M be the largest positive long, the worst case
+ 	for the RHS numerator is hi=M, lo=-M-1, and then
+ 	hi-lo-1 = M-(-M-1)-1 = 2*M.  Therefore unsigned long has enough
+ 	precision to compute the RHS exactly.
+ 	---------------------------------------------------------------*/
+ 	long n = 0;
+ 	if (lo < hi) {
+ 		unsigned long uhi = (unsigned long)hi;
+ 		unsigned long ulo = (unsigned long)lo;
+ 		unsigned long diff = uhi - ulo - 1;
+ 		n = (long)(diff / (unsigned long)step + 1);
+ 	}
+ 	return n;
+ }
+ 
+ static PyObject *
+ build_range(starto, endo, stepo)
+ 	PyObject *starto;
+ 	PyObject *endo;
+ 	PyObject *stepo;
+ {
+ 	long ilow = 0, ihigh = 0, istep = 1;
+ 	long bign;
+ 	int i, n;
+ 	PyObject * v;
+ 	
+ 	ilow = PyInt_AsLong(starto);
+ 	if (ilow == -1 && PyErr_Occurred())
+ 		return NULL;
+ 	ihigh = PyInt_AsLong(endo);
+ 	if (ihigh == -1 && PyErr_Occurred())
+ 		return NULL;
+ 	istep = PyInt_AsLong(stepo);
+ 	if (istep == -1 && PyErr_Occurred())
+ 		return NULL;
+ 
+ 	if (istep == 0) {
+ 		PyErr_SetString(PyExc_ValueError, "zero step for builtin range");
+ 		return NULL;
+ 	}
+ 	if (istep > 0)
+ 		bign = get_len_of_range(ilow, ihigh, istep);
+ 	else
+ 		bign = get_len_of_range(ihigh, ilow, -istep);
+ 	n = (int)bign;
+ 	if (bign < 0 || (long)n != bign) {
+ 		PyErr_SetString(PyExc_OverflowError,
+ 				"too many items in builin range");
+ 		return NULL;
+ 	}
+ 	v = PyList_New(n);
+ 	if (v == NULL)
+ 		return NULL;
+ 	for (i = 0; i < n; i++) {
+ 		PyObject *w = PyInt_FromLong(ilow);
+ 		if (w == NULL) {
+ 			Py_DECREF(v);
+ 			return NULL;
+ 		}
+ 		PyList_SET_ITEM(v, i, w);
+ 		ilow += istep;
+ 	}
+ 	return v;
  }
  
  static int
diff -crN --exclude=CVS src/Python/compile.c src-rangemaker/Python/compile.c
*** src/Python/compile.c	Mon Jul  3 17:39:47 2000
--- src-rangemaker/Python/compile.c	Fri Jul  7 13:05:41 2000
***************
*** 1018,1026 ****
  {
  	int len;
  	int i;
! 	if (TYPE(n) != testlist)
! 		REQ(n, exprlist);
! 	/* exprlist: expr (',' expr)* [',']; likewise for testlist */
  	len = (NCH(n) + 1) / 2;
  	for (i = 0; i < NCH(n); i += 2)
  		com_node(c, CHILD(n, i));
--- 1018,1025 ----
  {
  	int len;
  	int i;
! 	REQ(n, listmaker);
! 	/* listmaker: test (',' test)* [',']; rangetail ommited */
  	len = (NCH(n) + 1) / 2;
  	for (i = 0; i < NCH(n); i += 2)
  		com_node(c, CHILD(n, i));
***************
*** 1029,1034 ****
--- 1028,1081 ----
  }
  
  static void
+ com_rangetail(c, n)
+ 	struct compiling *c;
+ 	node *n;
+ {
+ 	REQ(n, rangetail);
+ 	com_node(c, CHILD(n, 1));
+ 	if (NCH(n) > 3) {
+ 		com_node(c, CHILD(n, 3));
+ 	} else {
+ 		PyObject *v = PyInt_FromLong(1L);
+ 		if (!v)
+ 			c->c_errors++;
+ 		com_addoparg(c, LOAD_CONST, com_addconst(c, v));
+ 		com_push(c, 1);
+ 		Py_XDECREF(v);
+ 	}
+ 	com_addbyte(c, BUILD_RANGE);
+ 	com_pop(c, 3);
+ }
+ 
+ static void
+ com_listmaker(c, n)
+ 	struct compiling *c;
+ 	node *n;
+ {
+ 	if (TYPE(CHILD(n, 0)) != rangetail && (NCH(n) == 1 ||
+ 				TYPE(CHILD(n, 1)) != rangetail)) {
+ 		/* '[' test (',' test)* [','] ']' */
+ 		com_list_constructor(c, n);
+ 		return;
+ 	}
+ 	if (NCH(n) == 1) {
+ 		/* '[' rangetail ']' */
+ 		PyObject *v = PyInt_FromLong(0L);
+ 		if (!v)
+ 			c->c_errors++;
+ 		com_addoparg(c, LOAD_CONST, com_addconst(c, v));
+ 		com_push(c, 1);
+ 		Py_XDECREF(v);
+ 		com_rangetail(c, CHILD(n, 0));
+ 	} else {
+ 		com_node(c, CHILD(n, 0));
+ 		com_rangetail(c, CHILD(n, 1));
+ 	}
+ }	
+ 	
+ 
+ static void
  com_dictmaker(c, n)
  	struct compiling *c;
  	node *n;
***************
*** 1073,1079 ****
  			com_push(c, 1);
  		}
  		else
! 			com_list_constructor(c, CHILD(n, 1));
  		break;
  	case LBRACE: /* '{' [dictmaker] '}' */
  		com_addoparg(c, BUILD_MAP, 0);
--- 1120,1126 ----
  			com_push(c, 1);
  		}
  		else
! 			com_listmaker(c, CHILD(n, 1));
  		break;
  	case LBRACE: /* '{' [dictmaker] '}' */
  		com_addoparg(c, BUILD_MAP, 0);

--OwLcNYc0lM97+oe1--