[py-svn] r35305 - in py/dist/py/apigen/source: . testing
guido at codespeak.net
guido at codespeak.net
Tue Dec 5 16:32:03 CET 2006
Author: guido
Date: Tue Dec 5 16:32:01 2006
New Revision: 35305
Modified:
py/dist/py/apigen/source/color.py
py/dist/py/apigen/source/html.py
py/dist/py/apigen/source/testing/test_color.py
py/dist/py/apigen/source/testing/test_html.py
Log:
Fixed some small bugs in the tokenizer, added a methods to HTMLDocument that
uses the tokenizer to colour lines (although not in use yet).
Modified: py/dist/py/apigen/source/color.py
==============================================================================
--- py/dist/py/apigen/source/color.py (original)
+++ py/dist/py/apigen/source/color.py Tue Dec 5 16:32:01 2006
@@ -33,12 +33,12 @@
class Tokenizer(object):
""" when fed lists strings, it will return tokens with type info
- very simple tokenizer, state is recorded for multi-line strings, etc.
+ very naive tokenizer, state is recorded for multi-line strings, etc.
"""
_re_word = re.compile('[\w_]+')
_re_space = re.compile('\s+')
- _re_number = re.compile('[\d\.]*\d+')
+ _re_number = re.compile('[\d\.]*\d[\d\.]*l?', re.I)
_re_rest = re.compile('[^\w\s\d]+')
# these will be filled using the schema
@@ -75,8 +75,8 @@
yield Token(s, 'string')
while data:
for f in [self._check_multiline_strings, self._check_full_strings,
- self._check_comments, self._check_word,
- self._check_space, self._check_number, self._check_rest]:
+ self._check_comments, self._check_number,
+ self._check_space, self._check_word, self._check_rest]:
data, t = f(data)
if t:
yield t
@@ -124,7 +124,12 @@
m = self._re_word.match(data)
if m:
s = m.group(0)
- return data[len(s):], Token(s, 'word')
+ type = 'word'
+ if s in self.schema.keyword:
+ type = 'keyword'
+ elif s in self.schema.alt_keyword:
+ type = 'alt_keyword'
+ return data[len(s):], Token(s, type)
return data, None
def _check_space(self, data):
@@ -148,3 +153,4 @@
return data[len(s):], Token(s, 'unknown')
return data, None
+
Modified: py/dist/py/apigen/source/html.py
==============================================================================
--- py/dist/py/apigen/source/html.py (original)
+++ py/dist/py/apigen/source/html.py Tue Dec 5 16:32:01 2006
@@ -4,6 +4,7 @@
from py.xml import html, raw
from compiler import ast
+from py.__.apigen.source.color import Tokenizer, PythonSchema
class HtmlEnchanter(object):
reserved_words = {}
@@ -36,8 +37,9 @@
pos = row.find(item.name)
assert pos != -1
end = len(item.name) + pos
- return self.colors(row[:pos]) + [html.a(row[pos:end], href="#" + item.name,
- name=item.name)] + self.colors(row[end:])
+ chunk = [html.a(row[pos:end], href="#" + item.name,
+ name=item.name)]
+ return self.colors(row[:pos]) + chunk + self.colors(row[end:])
except KeyError:
return self.colors(row) # no more info
@@ -52,7 +54,7 @@
return output
class HTMLDocument(object):
- def __init__(self):
+ def __init__(self, tokenizer=None):
self.html = root = html.html()
self.head = head = self.create_head()
root.append(head)
@@ -61,6 +63,10 @@
self.table, self.tbody = table, tbody = self.create_table()
body.append(table)
+ if tokenizer is None:
+ tokenizer = Tokenizer(PythonSchema)
+ self.tokenizer = tokenizer
+
def create_head(self):
return html.head(
html.title('source view'),
@@ -102,6 +108,26 @@
table.append(tbody)
return table, tbody
+ def prepare_line(self, text):
+ """ adds html formatting to text items (list)
+
+ only processes items if they're of a string type (or unicode)
+ """
+ ret = []
+ for item in text:
+ if type(item) in [str, unicode]:
+ tokens = self.tokenizer.tokenize(item)
+ for t in tokens:
+ print t.type
+ if t.type in ['keyword', 'alt_keyword', 'number',
+ 'string']:
+ ret.append(html.span(t.data, class_=t.type))
+ else:
+ ret.append(t.data)
+ else:
+ ret.append(item)
+ return ret
+
def add_row(self, lineno, text):
if text == ['']:
text = [raw(' ')]
@@ -109,7 +135,9 @@
html.td(class_='code', *text)))
def __unicode__(self):
- return self.html.unicode()
+ # XXX don't like to use indent=0 here, but else py.xml's indentation
+ # messes up the html inside the table cells (which displays formatting)
+ return self.html.unicode(indent=0)
def create_html(mod):
# out is some kind of stream
Modified: py/dist/py/apigen/source/testing/test_color.py
==============================================================================
--- py/dist/py/apigen/source/testing/test_color.py (original)
+++ py/dist/py/apigen/source/testing/test_color.py Tue Dec 5 16:32:01 2006
@@ -10,10 +10,31 @@
assert self.tokens('foo') == [Token('foo', type='word')]
assert self.tokens('_1_word') == [Token('_1_word', type='word')]
+ def test_keyword(self):
+ assert 'if' in PythonSchema.keyword
+ assert self.tokens('see if it works') == [Token('see', type='word'),
+ Token(' ',
+ type='whitespace'),
+ Token('if', type='keyword'),
+ Token(' ',
+ type='whitespace'),
+ Token('it', type='word'),
+ Token(' ',
+ type='whitespace'),
+ Token('works', type='word')]
+
def test_space(self):
assert self.tokens(' ') == [Token(' ', type='whitespace')]
assert self.tokens(' \n') == [Token(' \n', type='whitespace')]
+ def test_number(self):
+ # XXX incomplete
+ assert self.tokens('1') == [Token('1', type='number')]
+ assert self.tokens('1.1') == [Token('1.1', type='number')]
+ assert self.tokens('.1') == [Token('.1', type='number')]
+ assert self.tokens('1.') == [Token('1.', type='number')]
+ assert self.tokens('1.1l') == [Token('1.1l', type='number')]
+
def test_printable(self):
assert self.tokens('.') == [Token('.', 'unknown')]
assert self.tokens(';#$@\n') == [Token(';#$@', type='unknown'),
@@ -34,14 +55,18 @@
py.test.skip('not yet implemented')
assert self.tokens('"foo \\" bar"') == [Token('"foo \\" bar"',
type='string')]
+
def test_string_multiline(self):
t = Tokenizer(PythonSchema)
res = list(t.tokenize('"""foo\n'))
assert res == [Token('"""foo\n', type='string')]
res = list(t.tokenize('bar\n'))
- print res
assert res == [Token('bar\n', type='string')]
res = list(t.tokenize('"""\n'))
assert res == [Token('"""', type='string'),
Token('\n', type='whitespace')]
+ # tricky problem: the following line must not put the tokenizer in
+ # 'multiline state'...
+ assert self.tokens('"""foo"""') == [Token('"""foo"""', type='string')]
+ assert self.tokens('bar') == [Token('bar', type='word')]
Modified: py/dist/py/apigen/source/testing/test_html.py
==============================================================================
--- py/dist/py/apigen/source/testing/test_html.py (original)
+++ py/dist/py/apigen/source/testing/test_html.py Tue Dec 5 16:32:01 2006
@@ -72,6 +72,49 @@
assert isinstance(tbody, html.tbody)
assert tbody == table[0]
+ def prepare_line(self, line, doc=None):
+ if doc is None:
+ doc = HTMLDocument()
+ l = doc.prepare_line(line)
+ return ''.join(unicode(i) for i in l)
+
+ def test_prepare_line_basic(self):
+ result = self.prepare_line(['see if this works'])
+ assert result == 'see <span class="keyword">if</span> this works'
+ result = self.prepare_line(['see if this ',
+ html.a('works', name='works'),' too'])
+ assert result == ('see <span class="keyword">if</span> this '
+ '<a name="works">works</a> too')
+ result = self.prepare_line(['see if something else works'])
+ assert result == ('see <span class="keyword">if</span> something '
+ '<span class="keyword">else</span> works')
+ result = self.prepare_line(['see if something ',
+ html.a('else', name='else'), ' works too'])
+ assert result == ('see <span class="keyword">if</span> something '
+ '<a name="else">else</a> works too')
+
+ def test_prepare_line_strings(self):
+ result = self.prepare_line(['foo = "bar"'])
+ assert result == 'foo = <span class="string">"bar"</span>'
+
+ result = self.prepare_line(['"spam"'])
+ assert result == '<span class="string">"spam"</span>'
+
+ # test multiline strings
+ doc = HTMLDocument()
+ result = self.prepare_line(['"""start of multiline'], doc)
+ assert result == ('<span class="string">"""start of '
+ 'multiline</span>')
+ # doc should now be in 'string mode'
+ result = self.prepare_line(['see if it doesn\'t touch this'], doc)
+ assert result == ('<span class="string">see if it doesn't touch '
+ 'this</span>')
+ result = self.prepare_line(['"""'], doc)
+ assert result == '<span class="string">"""</span>'
+ result = self.prepare_line(['see if it colours this again'], doc)
+ assert result == ('see <span class="keyword">if</span> it colours '
+ 'this again')
+
def test_add_row(self):
doc = HTMLDocument()
doc.add_row(1, ['""" this is a foo implementation """'])
@@ -92,6 +135,6 @@
doc = HTMLDocument()
h = unicode(doc)
print h
- assert py.std.re.match(r'<html>\s+<head>\s+<title>[^<]+</title>'
+ assert py.std.re.match(r'<html>\s*<head>\s*<title>[^<]+</title>'
'.*</body>\w*</html>$', h, py.std.re.S)
More information about the pytest-commit
mailing list