[py-svn] r35305 - in py/dist/py/apigen/source: . testing

Tue Dec 5 16:32:03 CET 2006

Author: guido
Date: Tue Dec  5 16:32:01 2006
New Revision: 35305

Modified:
   py/dist/py/apigen/source/color.py
   py/dist/py/apigen/source/html.py
   py/dist/py/apigen/source/testing/test_color.py
   py/dist/py/apigen/source/testing/test_html.py
Log:
Fixed some small bugs in the tokenizer, added a methods to HTMLDocument that
uses the tokenizer to colour lines (although not in use yet).


Modified: py/dist/py/apigen/source/color.py
==============================================================================

--- py/dist/py/apigen/source/color.py	(original)
+++ py/dist/py/apigen/source/color.py	Tue Dec  5 16:32:01 2006
@@ -33,12 +33,12 @@
 class Tokenizer(object):
     """ when fed lists strings, it will return tokens with type info
     
-        very simple tokenizer, state is recorded for multi-line strings, etc.
+        very naive tokenizer, state is recorded for multi-line strings, etc.
     """
 
     _re_word = re.compile('[\w_]+')
     _re_space = re.compile('\s+')
-    _re_number = re.compile('[\d\.]*\d+')
+    _re_number = re.compile('[\d\.]*\d[\d\.]*l?', re.I)
     _re_rest = re.compile('[^\w\s\d]+')
 
     # these will be filled using the schema
@@ -75,8 +75,8 @@
                 yield Token(s, 'string')
         while data:
             for f in [self._check_multiline_strings, self._check_full_strings,
-                      self._check_comments, self._check_word,
-                      self._check_space, self._check_number, self._check_rest]:
+                      self._check_comments, self._check_number,
+                      self._check_space, self._check_word, self._check_rest]:
                 data, t = f(data)
                 if t:
                     yield t
@@ -124,7 +124,12 @@
         m = self._re_word.match(data)
         if m:
             s = m.group(0)
-            return data[len(s):], Token(s, 'word')
+            type = 'word'
+            if s in self.schema.keyword:
+                type = 'keyword'
+            elif s in self.schema.alt_keyword:
+                type = 'alt_keyword'
+            return data[len(s):], Token(s, type)
         return data, None
 
     def _check_space(self, data):
@@ -148,3 +153,4 @@
             return data[len(s):], Token(s, 'unknown')
         return data, None
 
+

Modified: py/dist/py/apigen/source/html.py
==============================================================================
--- py/dist/py/apigen/source/html.py	(original)
+++ py/dist/py/apigen/source/html.py	Tue Dec  5 16:32:01 2006
@@ -4,6 +4,7 @@
 
 from py.xml import html, raw
 from compiler import ast
+from py.__.apigen.source.color import Tokenizer, PythonSchema
 
 class HtmlEnchanter(object):
     reserved_words = {}
@@ -36,8 +37,9 @@
             pos = row.find(item.name)
             assert pos != -1
             end = len(item.name) + pos
-            return self.colors(row[:pos]) + [html.a(row[pos:end], href="#" + item.name,
-                                      name=item.name)] + self.colors(row[end:])
+            chunk = [html.a(row[pos:end], href="#" + item.name,
+                     name=item.name)]
+            return self.colors(row[:pos]) + chunk + self.colors(row[end:])
         except KeyError:
             return self.colors(row) # no more info
 
@@ -52,7 +54,7 @@
     return output
 
 class HTMLDocument(object):
-    def __init__(self):
+    def __init__(self, tokenizer=None):
         self.html = root = html.html()
         self.head = head = self.create_head()
         root.append(head)
@@ -61,6 +63,10 @@
         self.table, self.tbody = table, tbody = self.create_table()
         body.append(table)
 
+        if tokenizer is None:
+            tokenizer = Tokenizer(PythonSchema)
+        self.tokenizer = tokenizer
+
     def create_head(self):
         return html.head(
             html.title('source view'),
@@ -102,6 +108,26 @@
         table.append(tbody)
         return table, tbody
 
+    def prepare_line(self, text):
+        """ adds html formatting to text items (list)
+
+            only processes items if they're of a string type (or unicode)
+        """
+        ret = []
+        for item in text:
+            if type(item) in [str, unicode]:
+                tokens = self.tokenizer.tokenize(item)
+                for t in tokens:
+                    print t.type
+                    if t.type in ['keyword', 'alt_keyword', 'number',
+                                  'string']:
+                        ret.append(html.span(t.data, class_=t.type))
+                    else:
+                        ret.append(t.data)
+            else:
+                ret.append(item)
+        return ret
+
     def add_row(self, lineno, text):
         if text == ['']:
             text = [raw('&#xa0;')]
@@ -109,7 +135,9 @@
                                   html.td(class_='code', *text)))
 
     def __unicode__(self):
-        return self.html.unicode()
+        # XXX don't like to use indent=0 here, but else py.xml's indentation
+        # messes up the html inside the table cells (which displays formatting)
+        return self.html.unicode(indent=0)
 
 def create_html(mod):
     # out is some kind of stream

Modified: py/dist/py/apigen/source/testing/test_color.py
==============================================================================
--- py/dist/py/apigen/source/testing/test_color.py	(original)
+++ py/dist/py/apigen/source/testing/test_color.py	Tue Dec  5 16:32:01 2006
@@ -10,10 +10,31 @@
         assert self.tokens('foo') == [Token('foo', type='word')]
         assert self.tokens('_1_word') == [Token('_1_word', type='word')]
 
+    def test_keyword(self):
+        assert 'if' in PythonSchema.keyword
+        assert self.tokens('see if it works') == [Token('see', type='word'),
+                                                  Token(' ',
+                                                        type='whitespace'),
+                                                  Token('if', type='keyword'),
+                                                  Token(' ',
+                                                        type='whitespace'),
+                                                  Token('it', type='word'),
+                                                  Token(' ',
+                                                        type='whitespace'),
+                                                  Token('works', type='word')]
+
     def test_space(self):
         assert self.tokens(' ') == [Token(' ', type='whitespace')]
         assert self.tokens(' \n') == [Token(' \n', type='whitespace')]
 
+    def test_number(self):
+        # XXX incomplete
+        assert self.tokens('1') == [Token('1', type='number')]
+        assert self.tokens('1.1') == [Token('1.1', type='number')]
+        assert self.tokens('.1') == [Token('.1', type='number')]
+        assert self.tokens('1.') == [Token('1.', type='number')]
+        assert self.tokens('1.1l') == [Token('1.1l', type='number')]
+
     def test_printable(self):
         assert self.tokens('.') == [Token('.', 'unknown')]
         assert self.tokens(';#$@\n') == [Token(';#$@', type='unknown'),
@@ -34,14 +55,18 @@
         py.test.skip('not yet implemented')
         assert self.tokens('"foo \\" bar"') == [Token('"foo \\" bar"',
                                                       type='string')]
+
     def test_string_multiline(self):
         t = Tokenizer(PythonSchema)
         res = list(t.tokenize('"""foo\n'))
         assert res == [Token('"""foo\n', type='string')]
         res = list(t.tokenize('bar\n'))
-        print res
         assert res == [Token('bar\n', type='string')]
         res = list(t.tokenize('"""\n'))
         assert res == [Token('"""', type='string'),
                        Token('\n', type='whitespace')]
+        # tricky problem: the following line must not put the tokenizer in
+        # 'multiline state'...
+        assert self.tokens('"""foo"""') == [Token('"""foo"""', type='string')]
+        assert self.tokens('bar') == [Token('bar', type='word')]
 

Modified: py/dist/py/apigen/source/testing/test_html.py
==============================================================================
--- py/dist/py/apigen/source/testing/test_html.py	(original)
+++ py/dist/py/apigen/source/testing/test_html.py	Tue Dec  5 16:32:01 2006
@@ -72,6 +72,49 @@
         assert isinstance(tbody, html.tbody)
         assert tbody == table[0]
 
+    def prepare_line(self, line, doc=None):
+        if doc is None:
+            doc = HTMLDocument()
+        l = doc.prepare_line(line)
+        return ''.join(unicode(i) for i in l)
+
+    def test_prepare_line_basic(self):
+        result = self.prepare_line(['see if this works'])
+        assert result == 'see <span class="keyword">if</span> this works'
+        result = self.prepare_line(['see if this ',
+                                    html.a('works', name='works'),' too'])
+        assert result == ('see <span class="keyword">if</span> this '
+                          '<a name="works">works</a> too')
+        result = self.prepare_line(['see if something else works'])
+        assert result == ('see <span class="keyword">if</span> something '
+                          '<span class="keyword">else</span> works')
+        result = self.prepare_line(['see if something ',
+                                    html.a('else', name='else'), ' works too'])
+        assert result == ('see <span class="keyword">if</span> something '
+                          '<a name="else">else</a> works too')
+
+    def test_prepare_line_strings(self):
+        result = self.prepare_line(['foo = "bar"'])
+        assert result == 'foo = <span class="string">"bar"</span>'
+
+        result = self.prepare_line(['"spam"'])
+        assert result == '<span class="string">"spam"</span>'
+        
+        # test multiline strings
+        doc = HTMLDocument()
+        result = self.prepare_line(['"""start of multiline'], doc)
+        assert result == ('<span class="string">"""start of '
+                          'multiline</span>')
+        # doc should now be in 'string mode'
+        result = self.prepare_line(['see if it doesn\'t touch this'], doc)
+        assert result == ('<span class="string">see if it doesn't touch '
+                          'this</span>')
+        result = self.prepare_line(['"""'], doc)
+        assert result == '<span class="string">"""</span>'
+        result = self.prepare_line(['see if it colours this again'], doc)
+        assert result == ('see <span class="keyword">if</span> it colours '
+                          'this again')
+
     def test_add_row(self):
         doc = HTMLDocument()
         doc.add_row(1, ['""" this is a foo implementation """'])
@@ -92,6 +135,6 @@
         doc = HTMLDocument()
         h = unicode(doc)
         print h
-        assert py.std.re.match(r'<html>\s+<head>\s+<title>[^<]+</title>'
+        assert py.std.re.match(r'<html>\s*<head>\s*<title>[^<]+</title>'
                                 '.*</body>\w*</html>$', h, py.std.re.S)