[pypy-svn] r51871 - in pypy/dist/pypy/rlib/parsing: . test

jared.grubb at codespeak.net jared.grubb at codespeak.net
Tue Feb 26 10:56:31 CET 2008


Author: jared.grubb
Date: Tue Feb 26 10:56:29 2008
New Revision: 51871

Added:
   pypy/dist/pypy/rlib/parsing/test/test_pcre_regtest.py   (contents, props changed)
Modified:
   pypy/dist/pypy/rlib/parsing/deterministic.py
   pypy/dist/pypy/rlib/parsing/regexparse.py
   pypy/dist/pypy/rlib/parsing/test/test_deterministic.py
   pypy/dist/pypy/rlib/parsing/test/test_regexparse.py
Log:
parsing/regex stuff: add support for {n,} '\cx' '\377'; also working on adapter class to let us run some PCRE regression tests (what better way to test our NFA's and DFA's than some vigorous RE's :)
parsing/detrministic: make_nice_charset_repr now escapes the ] as well


Modified: pypy/dist/pypy/rlib/parsing/deterministic.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/deterministic.py	(original)
+++ pypy/dist/pypy/rlib/parsing/deterministic.py	Tue Feb 26 10:56:29 2008
@@ -27,13 +27,14 @@
     
     # Change the above list into a list of sorted tuples
     real_result = [(c,l) for [c,l] in result]
+    # Sort longer runs first (hence -c), then alphabetically
     real_result.sort(key=lambda (l,c): (-c,l))
     return real_result
 
 def make_nice_charset_repr(chars):
     # Compress the letters & digits
     letters = set(chars) & set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
-    therest = set(chars) - letters - set('-')
+    therest = set(chars) - letters - set(['-',']'])
     charranges = compress_char_set(letters)
     result = []
     for a, num in charranges:
@@ -45,8 +46,11 @@
         else:
             result.append("%s-%s" % (repr(a)[1:-1], repr(chr(ord(a) + num - 1))[1:-1]))
     result += [repr(c)[1:-1] for c in therest]
+    # Handle the special chars that MUST get escaped
     if '-' in chars:
         result += ['\\-']
+    if ']' in chars:
+        result += ['\\]']
     return "".join(result)
 
 class LexerError(Exception):
@@ -214,6 +218,9 @@
         result.emit("i = 0")
         result.emit("state = 0")
         result.start_block("while 1:")
+        
+        # state_to_chars is a dict containing the sets of 
+        #   Ex: state_to_chars = { 0: set('a','b','c'), ...}
         state_to_chars = {}
         for (state, char), nextstate in self.transitions.iteritems():
             state_to_chars.setdefault(state, {}).setdefault(nextstate, set()).add(char)

Modified: pypy/dist/pypy/rlib/parsing/regexparse.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/regexparse.py	(original)
+++ pypy/dist/pypy/rlib/parsing/regexparse.py	Tue Feb 26 10:56:29 2008
@@ -20,23 +20,38 @@
 }
 
 for i in range(256):
-    # 'x' and numbers are reserved for hexadecimal/octal escapes
-    if chr(i) in 'x01234567':
-        continue
-    escaped = "\\" + chr(i)
+    if chr(i) not in 'x01234567':
+        # 'x' and numbers are reserved for hexadecimal/octal escapes
+        escaped = "\\" + chr(i)
+        if escaped not in ESCAPES:
+            ESCAPES[escaped] = chr(i)
+
+    # Three digit octals
+    escaped = "\\%03o" % i
     if escaped not in ESCAPES:
         ESCAPES[escaped] = chr(i)
+
+    if 0 <= i <= 077:
+        # Two digit octal digs are ok too
+        escaped = "\\%02o" % i
+        if escaped not in ESCAPES:
+            ESCAPES[escaped] = chr(i)
+    
+    # Add the ctrl-x types:
+    #   Rule, according to PCRE:
+    #     if x is a lower case letter, it is converted to upper case. 
+    #     Then bit 6 of the character (hex 40) is inverted.   
+    #     Thus, \cz => 0x1A, but \c{ => 0x3B, while \c; => 0x7B.
+    escaped = "\\c%s" % chr(i)
+    if escaped not in ESCAPES:
+        ESCAPES[escaped] = chr(ord(chr(i).upper()) ^ 0x40)
+    
+
 for a in "0123456789ABCDEFabcdef":
     for b in "0123456789ABCDEFabcdef":
         escaped = "\\x%s%s" % (a, b)
         if escaped not in ESCAPES:
             ESCAPES[escaped] = chr(int("%s%s" % (a, b), 16))
-for a in "0123":
-    for b in "01234567":
-        for c in "01234567":
-            escaped = "\\x%s%s%s" % (a, b, c)
-            if escaped not in ESCAPES:
-                ESCAPES[escaped] = chr(int("%s%s%s" % (a, b, c), 8))
 
 def unescape(s):
     result = []
@@ -90,11 +105,16 @@
   | r1 = primary
     '?'
     return {regex.StringExpression("") | r1}
-  | r = primary
+  | r1 = primary
+    '{'
+    n = clippednumrange
+    '}'
+    return {r1 * n + r1.kleene()}
+  | r1 = primary
     '{'
     n = numrange
     '}'
-    return {r * n[0] + reduce(operator.or_, [r * i for i in range(n[1] - n[0] + 1)], regex.StringExpression(""))}
+    return {r1 * n[0] + reduce(operator.or_, [r1 * i for i in range(n[1] - n[0] + 1)], regex.StringExpression(""))}
   | primary;
 
 primary:
@@ -112,7 +132,7 @@
     return {c};
 
 QUOTEDCHAR:
-    `(\\x[0-9a-fA-F]{2})|(\\.)`;
+    `(\\x[0-9a-fA-F]{2})|(\\[0-3]?[0-7][0-7])|(\\c.)|(\\.)`;
 
 CHAR:
     `[^\*\+\(\)\[\]\{\}\|\.\-\?\,\^]`;
@@ -149,6 +169,11 @@
   | n1 = NUM
     return {n1, n1};
 
+clippednumrange:
+    n1 = NUM
+    ','
+    return {n1};
+
 NUM:
     c = `0|([1-9][0-9]*)`
     return {int(c)};
@@ -537,14 +562,14 @@
                     _call_status = self._primary()
                     _result = _call_status.result
                     _error = self._combine_errors(_error, _call_status.error)
-                    r = _result
+                    r1 = _result
                     _result = self.__chars__('{')
-                    _call_status = self._numrange()
+                    _call_status = self._clippednumrange()
                     _result = _call_status.result
                     _error = self._combine_errors(_error, _call_status.error)
                     n = _result
                     _result = self.__chars__('}')
-                    _result = (r * n[0] + reduce(operator.or_, [r * i for i in range(n[1] - n[0] + 1)], regex.StringExpression("")))
+                    _result = (r1 * n + r1.kleene())
                     break
                 except BacktrackException, _exc:
                     _error = self._combine_errors(_error, _exc.error)
@@ -554,10 +579,27 @@
                     _call_status = self._primary()
                     _result = _call_status.result
                     _error = self._combine_errors(_error, _call_status.error)
+                    r1 = _result
+                    _result = self.__chars__('{')
+                    _call_status = self._numrange()
+                    _result = _call_status.result
+                    _error = self._combine_errors(_error, _call_status.error)
+                    n = _result
+                    _result = self.__chars__('}')
+                    _result = (r1 * n[0] + reduce(operator.or_, [r1 * i for i in range(n[1] - n[0] + 1)], regex.StringExpression("")))
                     break
                 except BacktrackException, _exc:
                     _error = self._combine_errors(_error, _exc.error)
                     self._pos = _choice4
+                _choice5 = self._pos
+                try:
+                    _call_status = self._primary()
+                    _result = _call_status.result
+                    _error = self._combine_errors(_error, _call_status.error)
+                    break
+                except BacktrackException, _exc:
+                    _error = self._combine_errors(_error, _exc.error)
+                    self._pos = _choice5
                     raise BacktrackException(_error)
                 _call_status = self._primary()
                 _result = _call_status.result
@@ -787,7 +829,7 @@
         try:
             _result = None
             _error = None
-            _result = self._regex1380912319()
+            _result = self._regex1192240515()
             assert _status.status != _status.LEFTRECURSION
             _status.status = _status.NORMAL
             _status.pos = self._pos
@@ -1216,6 +1258,64 @@
             _status.error = _error
             _status.status = _status.ERROR
             raise BacktrackException(_error)
+    def clippednumrange(self):
+        return self._clippednumrange().result
+    def _clippednumrange(self):
+        _key = self._pos
+        _status = self._dict_clippednumrange.get(_key, None)
+        if _status is None:
+            _status = self._dict_clippednumrange[_key] = Status()
+        else:
+            _statusstatus = _status.status
+            if _statusstatus == _status.NORMAL:
+                self._pos = _status.pos
+                return _status
+            elif _statusstatus == _status.ERROR:
+                raise BacktrackException(_status.error)
+            elif (_statusstatus == _status.INPROGRESS or
+                  _statusstatus == _status.LEFTRECURSION):
+                _status.status = _status.LEFTRECURSION
+                if _status.result is not None:
+                    self._pos = _status.pos
+                    return _status
+                else:
+                    raise BacktrackException(None)
+            elif _statusstatus == _status.SOMESOLUTIONS:
+                _status.status = _status.INPROGRESS
+        _startingpos = self._pos
+        try:
+            _result = None
+            _error = None
+            _call_status = self._NUM()
+            _result = _call_status.result
+            _error = _call_status.error
+            n1 = _result
+            _result = self.__chars__(',')
+            _result = (n1)
+            if _status.status == _status.LEFTRECURSION:
+                if _status.result is not None:
+                    if _status.pos >= self._pos:
+                        _status.status = _status.NORMAL
+                        self._pos = _status.pos
+                        return _status
+                _status.pos = self._pos
+                _status.status = _status.SOMESOLUTIONS
+                _status.result = _result
+                _status.error = _error
+                self._pos = _startingpos
+                return self._clippednumrange()
+            _status.status = _status.NORMAL
+            _status.pos = self._pos
+            _status.result = _result
+            _status.error = _error
+            return _status
+        except BacktrackException, _exc:
+            _status.pos = -1
+            _status.result = None
+            _error = self._combine_errors(_error, _exc.error)
+            _status.error = _error
+            _status.status = _status.ERROR
+            raise BacktrackException(_error)
     def NUM(self):
         return self._NUM().result
     def _NUM(self):
@@ -1265,6 +1365,7 @@
         self._dict_subrange = {}
         self._dict_rangeelement = {}
         self._dict_numrange = {}
+        self._dict_clippednumrange = {}
         self._dict_NUM = {}
         self._pos = 0
         self._inputstream = inputstream
@@ -1282,10 +1383,10 @@
         _result = self._inputstream[_pos: _upto]
         self._pos = _upto
         return _result
-    def _regex1323868075(self):
+    def _regex1192240515(self):
         _choice1 = self._pos
         _runner = self._Runner(self._inputstream, self._pos)
-        _i = _runner.recognize_1323868075(self._pos)
+        _i = _runner.recognize_1192240515(self._pos)
         if _runner.last_matched_state == -1:
             self._pos = _choice1
             raise BacktrackException
@@ -1296,10 +1397,10 @@
         _result = self._inputstream[_pos: _upto]
         self._pos = _upto
         return _result
-    def _regex1380912319(self):
+    def _regex1323868075(self):
         _choice2 = self._pos
         _runner = self._Runner(self._inputstream, self._pos)
-        _i = _runner.recognize_1380912319(self._pos)
+        _i = _runner.recognize_1323868075(self._pos)
         if _runner.last_matched_state == -1:
             self._pos = _choice2
             raise BacktrackException
@@ -1360,7 +1461,7 @@
                 break
             runner.state = state
             return ~i
-        def recognize_1323868075(runner, i):
+        def recognize_1192240515(runner, i):
             #auto-generated code, don't edit
             assert i >= 0
             input = runner.text
@@ -1374,95 +1475,144 @@
                         runner.state = 0
                         return ~i
                     if char == '\\':
-                        state = 1
-                    elif '/' <= char <= '>':
-                        state = 1
-                    elif '@' <= char <= 'Z':
-                        state = 1
-                    elif '_' <= char <= 'z':
-                        state = 1
-                    elif '\x00' <= char <= "'":
-                        state = 1
-                    elif '~' <= char <= '\xff':
-                        state = 1
+                        state = 6
                     else:
                         break
-                runner.last_matched_state = state
-                runner.last_matched_index = i - 1
-                runner.state = state
-                if i == len(input):
-                    return i
-                else:
-                    return ~i
-                break
-            runner.state = state
-            return ~i
-        def recognize_1380912319(runner, i):
-            #auto-generated code, don't edit
-            assert i >= 0
-            input = runner.text
-            state = 0
-            while 1:
-                if state == 0:
+                if state == 1:
+                    runner.last_matched_index = i - 1
+                    runner.last_matched_state = state
                     try:
                         char = input[i]
                         i += 1
                     except IndexError:
-                        runner.state = 0
-                        return ~i
-                    if char == '\\':
+                        runner.state = 1
+                        return i
+                    if '0' <= char <= '7':
                         state = 4
                     else:
                         break
-                if state == 1:
+                if state == 2:
+                    runner.last_matched_index = i - 1
+                    runner.last_matched_state = state
                     try:
                         char = input[i]
                         i += 1
                     except IndexError:
-                        runner.state = 1
-                        return ~i
-                    if 'A' <= char <= 'F':
-                        state = 3
+                        runner.state = 2
+                        return i
+                    if '0' <= char <= '9':
+                        state = 5
+                    elif 'A' <= char <= 'F':
+                        state = 5
                     elif 'a' <= char <= 'f':
-                        state = 3
-                    elif '0' <= char <= '9':
-                        state = 3
+                        state = 5
                     else:
                         break
-                if state == 2:
+                if state == 3:
                     runner.last_matched_index = i - 1
                     runner.last_matched_state = state
                     try:
                         char = input[i]
                         i += 1
                     except IndexError:
-                        runner.state = 2
+                        runner.state = 3
                         return i
-                    if 'A' <= char <= 'F':
-                        state = 1
-                        continue
-                    elif 'a' <= char <= 'f':
-                        state = 1
-                        continue
-                    elif '0' <= char <= '9':
-                        state = 1
-                        continue
+                    if '\x00' <= char <= '\xff':
+                        state = 7
                     else:
                         break
                 if state == 4:
+                    runner.last_matched_index = i - 1
+                    runner.last_matched_state = state
                     try:
                         char = input[i]
                         i += 1
                     except IndexError:
                         runner.state = 4
+                        return i
+                    if '0' <= char <= '7':
+                        state = 7
+                    else:
+                        break
+                if state == 5:
+                    try:
+                        char = input[i]
+                        i += 1
+                    except IndexError:
+                        runner.state = 5
+                        return ~i
+                    if '0' <= char <= '9':
+                        state = 7
+                    elif 'A' <= char <= 'F':
+                        state = 7
+                    elif 'a' <= char <= 'f':
+                        state = 7
+                    else:
+                        break
+                if state == 6:
+                    try:
+                        char = input[i]
+                        i += 1
+                    except IndexError:
+                        runner.state = 6
                         return ~i
-                    if char == 'x':
+                    if '0' <= char <= '3':
+                        state = 1
+                        continue
+                    elif char == 'x':
                         state = 2
                         continue
-                    elif '\x00' <= char <= 'w':
+                    elif char == 'c':
                         state = 3
+                        continue
+                    elif '4' <= char <= '7':
+                        state = 4
+                        continue
                     elif 'y' <= char <= '\xff':
-                        state = 3
+                        state = 7
+                    elif '\x00' <= char <= '/':
+                        state = 7
+                    elif '8' <= char <= 'b':
+                        state = 7
+                    elif 'd' <= char <= 'w':
+                        state = 7
+                    else:
+                        break
+                runner.last_matched_state = state
+                runner.last_matched_index = i - 1
+                runner.state = state
+                if i == len(input):
+                    return i
+                else:
+                    return ~i
+                break
+            runner.state = state
+            return ~i
+        def recognize_1323868075(runner, i):
+            #auto-generated code, don't edit
+            assert i >= 0
+            input = runner.text
+            state = 0
+            while 1:
+                if state == 0:
+                    try:
+                        char = input[i]
+                        i += 1
+                    except IndexError:
+                        runner.state = 0
+                        return ~i
+                    if '~' <= char <= '\xff':
+                        state = 1
+                    elif '\x00' <= char <= "'":
+                        state = 1
+                    elif '_' <= char <= 'z':
+                        state = 1
+                    elif '@' <= char <= 'Z':
+                        state = 1
+                    elif '/' <= char <= '>':
+                        state = 1
+                    elif char == '\\':
+                        state = 1
                     else:
                         break
                 runner.last_matched_state = state
@@ -1506,6 +1656,13 @@
 
 
 
+
+
+
+
+
+
+
 def test_generate():
     f = py.magic.autopath()
     oldcontent = f.read()

Modified: pypy/dist/pypy/rlib/parsing/test/test_deterministic.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/test/test_deterministic.py	(original)
+++ pypy/dist/pypy/rlib/parsing/test/test_deterministic.py	Tue Feb 26 10:56:29 2008
@@ -170,4 +170,6 @@
     assert make_nice_charset_repr("ABCabc") == 'A-Ca-c'
     assert make_nice_charset_repr("zycba") == 'a-cyz'
     assert make_nice_charset_repr(string.ascii_letters) == 'A-Za-z'
-    assert make_nice_charset_repr(string.printable) == 'A-Za-z0-9\\t\\x0b\\n\\r\\x0c! #"%$\'&)(+*,/.;:=<?>@[]\\\\_^`{}|~\\-'
+    # this next one is ugly... need to clean it up (sometimes it fails because it's
+    # being generated from a dict, so the order is funky)
+    assert make_nice_charset_repr(string.printable) == 'A-Za-z0-9\\t\\x0b\\n\\r\\x0c! #"%$\'&)(+*,/.;:=<?>@[\\\\_^`{}|~\\-\\]'
\ No newline at end of file

Added: pypy/dist/pypy/rlib/parsing/test/test_pcre_regtest.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/rlib/parsing/test/test_pcre_regtest.py	Tue Feb 26 10:56:29 2008
@@ -0,0 +1,172 @@
+# This file can (in progress) read and parse PCRE regression tests to try out
+# on our regular expression library.
+# 
+# To try this out, 'man pcretest' and then grab testinput1 and testoutput1 from 
+# the PCRE source code. (I need to look into whether we could distribute these
+# files with pypy?)
+
+import py
+from pypy.rlib.parsing.regexparse import make_runner, unescape, RegexParser
+import string
+import re
+
+py.test.skip("In Progress...")
+
+def get_simult_lines(tests, results, test_line_num=0):
+    """Returns a line from the input/output, ensuring that
+    we are sync'd up between the two."""
+    test = tests.pop(0)
+    result = results.pop(0)
+    
+    test_line_num += 1
+    
+    if test != result:
+        raise Exception("Lost sync between files at input line %d.\n  INPUT: %s\n  OUTPUT: %s" % (test_line_num, test, result))
+        
+    return test
+    
+def get_definition_line(tests, results):
+    """Gets a test definition line, formatted per the PCRE spec."""
+    delim = None
+    test = ''
+    result = ''
+    
+    # A line is marked by a start-delimeter and an end-delimeter.
+    # The delimeter is non-alphanumeric
+    # If a backslash follows the delimiter, then the backslash should
+    #   be appended to the end. (Otherwise, \ + delim would not be a
+    #   delim anymore!)
+    while 1:
+        test += get_simult_lines(tests, results)
+    
+        if delim is None:
+            delim = test[0]
+            assert delim in (set(string.printable) - set(string.letters) - set(string.digits))
+            test_re = re.compile(r'%(delim)s(([^%(delim)s]|\\%(delim)s)*([^\\]))%(delim)s(\\?)(.*)' % {'delim': delim})
+        
+        matches = test_re.findall(test)
+        if matches:
+            break
+
+    assert len(matches)==1
+    test = matches[0][0]
+    
+    # Add the backslash, if we gotta
+    test += matches[0][-2]
+    flags = matches[0][-1]
+
+    return test, flags
+    
+def get_test_result(tests, results):
+    """Gets the expected return from the regular expression"""
+    # Second line is the test to run against the regex
+    # '    TEXT'
+    test = get_simult_lines(tests, results)
+    if not test:
+        return None, None
+    if not test.startswith('    '):
+        raise Exception("Input & output match, but I don't understand. (Got %r)" % test)
+    test = unescape(test[4:])
+    
+    # Third line in the OUTPUT is the result, either:
+    # ' 0: ...' for a match
+    # 'No match' for no match
+    result = unescape(results.pop(0))
+    if result == 'No match':
+        pass
+    elif result.startswith(' 0: '):
+        # Now we need to eat any further lines like:
+        # ' 1: ....' a subgroup match
+        while results[0]:
+            if results[0][2] == ':':
+                results.pop(0)
+            else:
+                break
+    else:
+        raise Exception("Lost sync in output.")
+    return test, result
+    
+def test_file():
+    """Open the PCRE tests and run them."""
+    tests = [line.rstrip() for line in open('testinput1','r').readlines()]
+    results = [line.rstrip() for line in open('testoutput1','r').readlines()]
+    
+    regex_flag_mapping = { '': lambda s: s, 
+                           'i': lambda s: s.upper()
+                         }
+    
+    import pdb
+    while tests:
+        # First line is a test, in the form:
+        # '/regex expression/FLAGS'
+        regex, regex_flags = get_definition_line(tests, results)
+
+        # Handle the flags:
+        try:
+            text_prepare = regex_flag_mapping[regex_flags]
+        except KeyError:
+            print "UNKNOWN FLAGS: %s" % regex_flags
+            continue
+        
+        print '%r' % regex
+
+        skipped = any([op in regex for op in ['*?', '??', '+?', '}?']])        
+        if skipped:
+            print "  SKIPPED (cant do non-greedy operators)"
+            # now burn all the tests for this regex
+            while 1:
+                test, result = get_test_result(tests, results)
+                if not test:
+                    break   # A blank line means we have nothing to do
+            continue
+                
+        regex_to_use = text_prepare(regex)
+        
+        anchor_left = regex_to_use.startswith('^')
+        anchor_right = regex_to_use.endswith('$') and not regex_to_use.endswith('\\$')
+        if anchor_left:
+            regex_to_use = regex_to_use[1:]   # chop the ^ if it's there
+        if anchor_right:
+            regex_to_use = regex_to_use[:-1]  # chop the $ if it's there
+        
+        # Finally, we make the pypy regex runner
+        runner = make_runner(regex_to_use)
+
+        # Now run the test expressions against the Regex
+        while 1:
+            test, result = get_test_result(tests, results)
+            if not test:
+                break   # A blank line means we have nothing to do
+                
+            # Create possible subsequences that we should test
+            if anchor_left:
+                subseq_gen = [0]
+            else:
+                subseq_gen = (start for start in range(0, len(test)))
+            
+            if anchor_right:
+                subseq_gen = ( (start, len(test)) for start in subseq_gen )
+            else:
+                # Go backwards to simulate greediness
+                subseq_gen = ( (start, end) for start in subseq_gen for end in range(len(test)+1, start+1, -1) )
+
+            # Search the possibilities for a match...
+            for start, end in subseq_gen:
+                attempt = text_prepare(test[start:end])
+                matched = runner.recognize(attempt)
+                if matched: 
+                    break
+            
+            # Did we get what we expected?
+            if result == 'No match':
+                if matched:
+                    print "  FALSE MATCH: regex==%r test==%r" % (regex, test)
+                else:
+                    print "  pass       : regex==%r test==%r" % (regex, test)
+            elif result.startswith(' 0: '):
+                if not matched:
+                    print "  MISSED:      regex==%r test==%r" % (regex, test)
+                elif not attempt==text_prepare(result[4:]):
+                    print "  BAD MATCH:   regex==%r test==%r found==%r expect==%r" % (regex, test, attempt, result[4:])
+                else:
+                    print "  pass       : regex==%r test==%r" % (regex, test)

Modified: pypy/dist/pypy/rlib/parsing/test/test_regexparse.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/test/test_regexparse.py	(original)
+++ pypy/dist/pypy/rlib/parsing/test/test_regexparse.py	Tue Feb 26 10:56:29 2008
@@ -96,7 +96,7 @@
     assert r.recognize("a" * 15)
     assert not r.recognize("a" * 14)
     assert not r.recognize("a" * 16)
-    assert not r.recognize("b" * 16)
+    assert not r.recognize("b" * 15)
     r = make_runner('a{2,10}')
     assert r.recognize("a" * 2)
     assert r.recognize("a" * 5)
@@ -105,6 +105,14 @@
     assert not r.recognize("a" + "b")
     assert not r.recognize("a" * 11)
     assert not r.recognize("a" * 12)
+    r = make_runner('a{3,}')
+    assert r.recognize("a" * 3)
+    assert r.recognize("a" * 5)
+    assert r.recognize("a" * 10)
+    assert r.recognize("a" * 12)
+    assert not r.recognize("a")
+    assert not r.recognize("a" + "b")
+    assert not r.recognize("a" * 2)
 
 def test_quotes():
     r = make_runner('"[^\\"]*"')
@@ -114,6 +122,13 @@
     r = make_runner('\\n\\x0a')
     assert not r.recognize("n\n")
     assert r.recognize("\n\n")
+    r = make_runner('\\12\\012')
+    assert r.recognize("\n\n")
+    r = make_runner('\\377\\xff')
+    assert r.recognize("\xff\xff")
+    r = make_runner('\\?')
+    assert r.recognize("?")
+    assert not r.recognize("a")
 
 def test_comment():
     r = make_runner("(/\\*[^\\*/]*\\*/)")



More information about the Pypy-commit mailing list