[pypy-svn] r48466 - in pypy/dist/pypy: annotation annotation/test objspace objspace/flow/test objspace/std/test rpython rpython/lltypesystem rpython/module rpython/ootypesystem rpython/ootypesystem/test rpython/test translator/test
fijal at codespeak.net
fijal at codespeak.net
Fri Nov 9 13:53:05 CET 2007
Author: fijal
Date: Fri Nov 9 13:53:02 2007
New Revision: 48466
Modified:
pypy/dist/pypy/annotation/binaryop.py
pypy/dist/pypy/annotation/model.py
pypy/dist/pypy/annotation/signature.py
pypy/dist/pypy/annotation/test/test_annrpython.py
pypy/dist/pypy/annotation/unaryop.py
pypy/dist/pypy/objspace/descroperation.py
pypy/dist/pypy/objspace/flow/test/test_objspace.py
pypy/dist/pypy/objspace/std/test/test_unicodeobject.py
pypy/dist/pypy/rpython/extfunctable.py
pypy/dist/pypy/rpython/llinterp.py
pypy/dist/pypy/rpython/lltypesystem/rstr.py
pypy/dist/pypy/rpython/module/support.py
pypy/dist/pypy/rpython/ootypesystem/ooregistry.py
pypy/dist/pypy/rpython/ootypesystem/ootype.py
pypy/dist/pypy/rpython/ootypesystem/rstr.py
pypy/dist/pypy/rpython/ootypesystem/test/test_ooann.py
pypy/dist/pypy/rpython/rbuiltin.py
pypy/dist/pypy/rpython/rstr.py
pypy/dist/pypy/rpython/test/test_rstr.py
pypy/dist/pypy/rpython/test/test_runicode.py
pypy/dist/pypy/rpython/test/tool.py
pypy/dist/pypy/translator/test/test_geninterp.py
Log:
(fijal, cfbolz, antocuni) Merge branch of rpython-unicode
------------------------------------------------------------------------
r48463 | cfbolz | 2007-11-09 11:26:22 +0100 (Fri, 09 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/annotation/unaryop.py
M /pypy/branch/pypy-rpython-unicode/rpython/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_runicode.py
unicode.decode doesn't make any sense! it should be unicode.encode
------------------------------------------------------------------------
r48461 | cfbolz | 2007-11-09 10:27:42 +0100 (Fri, 09 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_runicode.py
a failing test about unicode conversion raising an exception
------------------------------------------------------------------------
r48458 | fijal | 2007-11-08 23:37:41 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_runicode.py
Yay! Turn on ootypesystem tests, as they're passing
------------------------------------------------------------------------
r48457 | fijal | 2007-11-08 23:24:18 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/ooregistry.py
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/ootype.py
Some more tests passing
------------------------------------------------------------------------
r48456 | cfbolz | 2007-11-08 22:55:54 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/rstr.py
more tests passing
------------------------------------------------------------------------
r48455 | cfbolz | 2007-11-08 22:50:47 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/rbuiltin.py
at least this is needed
------------------------------------------------------------------------
r48454 | cfbolz | 2007-11-08 22:47:04 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/objspace/flow/test/test_objspace.py
skip this for now
------------------------------------------------------------------------
r48453 | cfbolz | 2007-11-08 22:46:01 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/objspace/flow/operation.py
remove this too
------------------------------------------------------------------------
r48452 | fijal | 2007-11-08 22:39:48 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/objspace/flow/specialcase.py
M /pypy/branch/pypy-rpython-unicode/objspace/flow/test/test_objspace.py
Revert 48449
------------------------------------------------------------------------
r48451 | cfbolz | 2007-11-08 22:38:11 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/interpreter/baseobjspace.py
remove unicode and unichr from the list of base space operations
------------------------------------------------------------------------
r48450 | fijal | 2007-11-08 22:38:00 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/annotation/unaryop.py
M /pypy/branch/pypy-rpython-unicode/objspace/flow/operation.py
Remove unichr as spaceop
------------------------------------------------------------------------
r48449 | fijal | 2007-11-08 22:34:54 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/objspace/flow/specialcase.py
M /pypy/branch/pypy-rpython-unicode/objspace/flow/test/test_objspace.py
Remove unichr as spaceop
------------------------------------------------------------------------
r48448 | fijal | 2007-11-08 22:14:18 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/rstr.py
Create proper str2unicode
------------------------------------------------------------------------
r48447 | fijal | 2007-11-08 22:07:25 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/objspace/descroperation.py
Remove unichr() and unicode() from descroperatoin
------------------------------------------------------------------------
r48444 | fijal | 2007-11-08 21:19:53 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/ootype.py
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/rstr.py
Squash few failures, now 7 out of 13 tests are failing
------------------------------------------------------------------------
r48439 | fijal | 2007-11-08 20:55:03 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/rstr.py
that was easy :)
------------------------------------------------------------------------
r48435 | cfbolz | 2007-11-08 20:50:14 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/translator/test/test_geninterp.py
test for geninterp unicode behaviour
------------------------------------------------------------------------
r48433 | antocuni | 2007-11-08 20:43:50 +0100 (Thu, 08 Nov 2007) | 3 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/llinterp.py
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/ooregistry.py
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/ootype.py
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/rstr.py
more work towards ootype unicode
------------------------------------------------------------------------
r48428 | antocuni | 2007-11-08 20:16:06 +0100 (Thu, 08 Nov 2007) | 4 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/ootype.py
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/test/test_ooann.py
fix this test; the problem was the _defl method of AbstractString,
which shouldn't have been there.
------------------------------------------------------------------------
r48427 | fijal | 2007-11-08 19:04:22 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/objspace/std/test/test_unicodeobject.py
Skipped test that should work
------------------------------------------------------------------------
r48426 | fijal | 2007-11-08 18:43:51 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/objspace/flow/test/test_objspace.py
Fix test
------------------------------------------------------------------------
r48425 | antocuni | 2007-11-08 18:39:23 +0100 (Thu, 08 Nov 2007) | 5 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/ootype.py
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/test/test_ooann.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/tool.py
first step to add Unicode support to ootype; it's not completed
because of a blocker I could not solve yet; see the failing test in
ootypesystem/test/test_ooann.py
------------------------------------------------------------------------
r48418 | fijal | 2007-11-08 16:08:03 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/annotation/unaryop.py
M /pypy/branch/pypy-rpython-unicode/rpython/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_runicode.py
unicode.decode('ascii')
------------------------------------------------------------------------
r48417 | fijal | 2007-11-08 16:02:00 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_runicode.py
One more case...
------------------------------------------------------------------------
r48416 | fijal | 2007-11-08 15:52:44 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/lltypesystem/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_runicode.py
ll_str for unicode
------------------------------------------------------------------------
r48415 | fijal | 2007-11-08 15:39:32 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_runicode.py
Test str(unicodeconstant) is constant propagated and works out of the box.
------------------------------------------------------------------------
r48414 | fijal | 2007-11-08 14:31:36 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_runicode.py
This is no longer needed I think
------------------------------------------------------------------------
r48411 | fijal | 2007-11-08 14:20:32 +0100 (Thu, 08 Nov 2007) | 3 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_runicode.py
Shuffle tests around, so unichar tests are in test_runicode now, not in
test_rstr.
------------------------------------------------------------------------
r48410 | antocuni | 2007-11-08 14:20:11 +0100 (Thu, 08 Nov 2007) | 3 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/rstr.py
remove outdated comment
------------------------------------------------------------------------
r48408 | cfbolz | 2007-11-08 13:46:19 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/ootype.py
introduce Unicode. check in to have Anto take over.
------------------------------------------------------------------------
r48388 | fijal | 2007-11-08 01:55:43 +0100 (Thu, 08 Nov 2007) | 6 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/annotation/binaryop.py
M /pypy/branch/pypy-rpython-unicode/annotation/unaryop.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_runicode.py
All string tests, which are decided to be supported by now, passes
with unicode (for lltypesystem). There are few todos:
* fix the if typeOf(...) evil hacks
* think about unicode mod sth
* think about str(some_unicode)
------------------------------------------------------------------------
r48386 | fijal | 2007-11-08 01:43:13 +0100 (Thu, 08 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/lltypesystem/rstr.py
First round of simplifications and getting rid of ugly typeOf switch
------------------------------------------------------------------------
r48374 | fijal | 2007-11-07 23:19:32 +0100 (Wed, 07 Nov 2007) | 3 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/lltypesystem/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_rstr.py
* fix test
* a bit of code simplification
------------------------------------------------------------------------
r48372 | fijal | 2007-11-07 22:05:57 +0100 (Wed, 07 Nov 2007) | 3 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/lltypesystem/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/ootypesystem/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_runicode.py
Support for most methods which I think should be supported for unicode
(right now). Still roughly 12 tests missing.
------------------------------------------------------------------------
r48367 | fijal | 2007-11-07 18:00:44 +0100 (Wed, 07 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/lltypesystem/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/module/support.py
M /pypy/branch/pypy-rpython-unicode/rpython/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_runicode.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/tool.py
Whack, whack.. another tests are passing
------------------------------------------------------------------------
r48366 | fijal | 2007-11-07 18:00:28 +0100 (Wed, 07 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/annotation/binaryop.py
Allow unichr * integer
------------------------------------------------------------------------
r48365 | fijal | 2007-11-07 16:23:07 +0100 (Wed, 07 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/lltypesystem/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_runicode.py
One test passing, one skipped
------------------------------------------------------------------------
r48364 | fijal | 2007-11-07 16:15:08 +0100 (Wed, 07 Nov 2007) | 3 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/lltypesystem/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_runicode.py
Eventual future support for unichar.isxxx, right now disabled (+ disable
test)
------------------------------------------------------------------------
r48363 | fijal | 2007-11-07 16:14:37 +0100 (Wed, 07 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/annotation/unaryop.py
Remove unnecessary method (it's on SomeObject anyway)
------------------------------------------------------------------------
r48362 | fijal | 2007-11-07 16:04:57 +0100 (Wed, 07 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/lltypesystem/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_rstr.py
unichr -> unicode translation
------------------------------------------------------------------------
r48360 | fijal | 2007-11-07 14:48:26 +0100 (Wed, 07 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/lltypesystem/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_rstr.py
Iteration over unicode strings works.
------------------------------------------------------------------------
r48359 | fijal | 2007-11-07 14:46:16 +0100 (Wed, 07 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/annotation/model.py
M /pypy/branch/pypy-rpython-unicode/annotation/test/test_annrpython.py
M /pypy/branch/pypy-rpython-unicode/annotation/unaryop.py
Fix thing that iteration over unicode returned chars
------------------------------------------------------------------------
r48357 | fijal | 2007-11-07 14:22:06 +0100 (Wed, 07 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/annotation/model.py
M /pypy/branch/pypy-rpython-unicode/annotation/test/test_annrpython.py
Make test pass (thanks carl, that was stupid)
------------------------------------------------------------------------
r48356 | fijal | 2007-11-07 14:02:11 +0100 (Wed, 07 Nov 2007) | 4 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/annotation/model.py
M /pypy/branch/pypy-rpython-unicode/annotation/signature.py
M /pypy/branch/pypy-rpython-unicode/annotation/test/test_annrpython.py
M /pypy/branch/pypy-rpython-unicode/annotation/unaryop.py
* Relax somewhat return values from method_xxx of SomeString, as SomeChar
should return string, not self
* Try to make the same working on SomeUnicode, does not work
------------------------------------------------------------------------
r48341 | fijal | 2007-11-06 17:10:38 +0100 (Tue, 06 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/annotation/test/test_annrpython.py
M /pypy/branch/pypy-rpython-unicode/annotation/unaryop.py
Expose string methods to unicode strings
------------------------------------------------------------------------
r48336 | fijal | 2007-11-06 13:24:09 +0100 (Tue, 06 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_rstr.py
simplify code, as mixing unicode and strings are forbidden
------------------------------------------------------------------------
r48335 | fijal | 2007-11-06 13:19:17 +0100 (Tue, 06 Nov 2007) | 3 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/annotation/binaryop.py
M /pypy/branch/pypy-rpython-unicode/annotation/test/test_annrpython.py
Forbid mixing unicode and string (this leaves some desgin flaw around
as mixing of unichr and chr is ok)
------------------------------------------------------------------------
r48334 | fijal | 2007-11-06 12:42:26 +0100 (Tue, 06 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/lltypesystem/rstr.py
Simple version works as well
------------------------------------------------------------------------
r48333 | fijal | 2007-11-06 12:39:37 +0100 (Tue, 06 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_rstr.py
Make test more mix unicode and str
------------------------------------------------------------------------
r48332 | fijal | 2007-11-06 12:38:02 +0100 (Tue, 06 Nov 2007) | 3 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/rpython/lltypesystem/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_rstr.py
M /pypy/branch/pypy-rpython-unicode/rpython/test/test_runicode.py
Some support for unicode strings. This is work in progress, not all tests
are really testing what they should.
------------------------------------------------------------------------
r48329 | fijal | 2007-11-06 10:51:36 +0100 (Tue, 06 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/annotation/binaryop.py
Ooops, one annotation too far
------------------------------------------------------------------------
r48328 | fijal | 2007-11-06 01:08:34 +0100 (Tue, 06 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/annotation/binaryop.py
M /pypy/branch/pypy-rpython-unicode/annotation/test/test_annrpython.py
M /pypy/branch/pypy-rpython-unicode/annotation/unaryop.py
Proper support for unicode() and unichr() operations
------------------------------------------------------------------------
r48327 | fijal | 2007-11-06 00:54:49 +0100 (Tue, 06 Nov 2007) | 3 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/objspace/descroperation.py
Dummy descroperations, because otherwise something complains about lack
of those
------------------------------------------------------------------------
r48326 | fijal | 2007-11-05 21:07:56 +0100 (Mon, 05 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/interpreter/baseobjspace.py
M /pypy/branch/pypy-rpython-unicode/objspace/flow/operation.py
M /pypy/branch/pypy-rpython-unicode/objspace/flow/test/test_objspace.py
Constant folding for unicode as well
------------------------------------------------------------------------
r48325 | fijal | 2007-11-05 21:05:56 +0100 (Mon, 05 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/pypy-rpython-unicode/interpreter/baseobjspace.py
M /pypy/branch/pypy-rpython-unicode/objspace/flow/operation.py
M /pypy/branch/pypy-rpython-unicode/objspace/flow/test/test_objspace.py
Support for constant-folding of unichr
------------------------------------------------------------------------
r48315 | fijal | 2007-11-05 12:25:54 +0100 (Mon, 05 Nov 2007) | 2 lines
Changed paths:
A /pypy/branch/pypy-rpython-unicode (from /pypy/dist/pypy:48314)
Create new branch for rpython unicode support
Modified: pypy/dist/pypy/annotation/binaryop.py
==============================================================================
--- pypy/dist/pypy/annotation/binaryop.py (original)
+++ pypy/dist/pypy/annotation/binaryop.py Fri Nov 9 13:53:02 2007
@@ -426,13 +426,18 @@
return SomeChar()
-class __extend__(pairtype(SomeUnicodeCodePoint, SomeUnicodeCodePoint),
- pairtype(SomeChar, SomeUnicodeCodePoint),
+class __extend__(pairtype(SomeChar, SomeUnicodeCodePoint),
pairtype(SomeUnicodeCodePoint, SomeChar)):
+ def union((uchr1, uchr2)):
+ return SomeUnicodeCodePoint()
+class __extend__(pairtype(SomeUnicodeCodePoint, SomeUnicodeCodePoint)):
def union((uchr1, uchr2)):
return SomeUnicodeCodePoint()
+ def add((chr1, chr2)):
+ return SomeUnicodeString()
+
class __extend__(pairtype(SomeString, SomeObject)):
def mod((str, args)):
@@ -610,10 +615,11 @@
lst1.listdef.resize()
delitem.can_only_throw = []
-class __extend__(pairtype(SomeString, SomeSlice)):
+class __extend__(pairtype(SomeString, SomeSlice),
+ pairtype(SomeUnicodeString, SomeSlice)):
def getitem((str1, slic)):
- return SomeString()
+ return str1.basestringclass()
getitem.can_only_throw = []
class __extend__(pairtype(SomeString, SomeInteger)):
@@ -651,23 +657,30 @@
getitem_idx_key = getitem_idx
- # uncomment if we really want to support that
- #def mul((str1, int2)): # xxx do we want to support this
- # getbookkeeper().count("str_mul", str1, int2)
- # return SomeString()
+ def mul((str1, int2)): # xxx do we want to support this
+ getbookkeeper().count("str_mul", str1, int2)
+ return SomeUnicodeString()
-class __extend__(pairtype(SomeInteger, SomeString)):
+class __extend__(pairtype(SomeInteger, SomeString),
+ pairtype(SomeInteger, SomeUnicodeString)):
def mul((int1, str2)): # xxx do we want to support this
getbookkeeper().count("str_mul", str2, int1)
- return SomeString()
+ return str2.basestringclass()
-class __extend__(pairtype(SomeString, SomeUnicodeString),
- pairtype(SomeUnicodeString, SomeString),
+class __extend__(pairtype(SomeUnicodeCodePoint, SomeUnicodeString),
+ pairtype(SomeUnicodeString, SomeUnicodeCodePoint),
pairtype(SomeUnicodeString, SomeUnicodeString)):
def union((str1, str2)):
- return SomeUnicodeString(can_be_None=str1.can_be_None or
- str2.can_be_None)
+ return SomeUnicodeString(can_be_None=str1.can_be_none() or
+ str2.can_be_none())
+
+ def add((str1, str2)):
+ # propagate const-ness to help getattr(obj, 'prefix' + const_name)
+ result = SomeUnicodeString()
+ if str1.is_immutable_constant() and str2.is_immutable_constant():
+ result.const = str1.const + str2.const
+ return result
class __extend__(pairtype(SomeInteger, SomeList)):
Modified: pypy/dist/pypy/annotation/model.py
==============================================================================
--- pypy/dist/pypy/annotation/model.py (original)
+++ pypy/dist/pypy/annotation/model.py Fri Nov 9 13:53:02 2007
@@ -220,14 +220,16 @@
class SomeChar(SomeString):
"Stands for an object known to be a string of length 1."
-class SomeUnicodeCodePoint(SomeObject):
+class SomeUnicodeCodePoint(SomeUnicodeString):
"Stands for an object known to be a unicode codepoint."
- knowntype = unicode
- immutable = True
-
def can_be_none(self):
return False
+SomeString.basestringclass = SomeString
+SomeString.basecharclass = SomeChar
+SomeUnicodeString.basestringclass = SomeUnicodeString
+SomeUnicodeString.basecharclass = SomeUnicodeCodePoint
+
class SomeList(SomeObject):
"Stands for a homogenous list of any length."
knowntype = list
Modified: pypy/dist/pypy/annotation/signature.py
==============================================================================
--- pypy/dist/pypy/annotation/signature.py (original)
+++ pypy/dist/pypy/annotation/signature.py Fri Nov 9 13:53:02 2007
@@ -3,7 +3,7 @@
from pypy.annotation.model import SomeBool, SomeInteger, SomeString,\
SomeFloat, SomeList, SomeDict, s_None, SomeExternalObject,\
SomeObject, SomeInstance, SomeTuple, lltype_to_annotation,\
- unionof
+ unionof, SomeUnicodeString
from pypy.annotation.classdef import ClassDef, InstanceSource
from pypy.annotation.listdef import ListDef, MOST_GENERAL_LISTDEF
from pypy.annotation.dictdef import DictDef, MOST_GENERAL_DICTDEF
@@ -80,6 +80,8 @@
return SomeFloat()
elif issubclass(t, str): # py.lib uses annotated str subclasses
return SomeString()
+ elif t is unicode:
+ return SomeUnicodeString()
elif t is list:
return SomeList(MOST_GENERAL_LISTDEF)
elif t is dict:
Modified: pypy/dist/pypy/annotation/test/test_annrpython.py
==============================================================================
--- pypy/dist/pypy/annotation/test/test_annrpython.py (original)
+++ pypy/dist/pypy/annotation/test/test_annrpython.py Fri Nov 9 13:53:02 2007
@@ -2883,7 +2883,7 @@
def test_unicode(self):
def g(n):
if n > 0:
- return "xxx"
+ return unichr(1234)
else:
return u"x\xe4x"
@@ -2906,6 +2906,62 @@
s = a.build_types(f, [str])
assert isinstance(s, annmodel.SomeUnicodeString)
+ def test_unicode_add(self):
+ def f(x):
+ return unicode(x) + unichr(1234)
+
+ def g(x):
+ return unichr(x) + unichr(2)
+
+ a = self.RPythonAnnotator()
+ s = a.build_types(f, [str])
+ assert isinstance(s, annmodel.SomeUnicodeString)
+ a = self.RPythonAnnotator()
+ s = a.build_types(f, [int])
+ assert isinstance(s, annmodel.SomeUnicodeString)
+
+ def test_unicode_startswith(self):
+ def f(x):
+ return u'xxxx'.replace(x, u'z')
+
+ a = self.RPythonAnnotator()
+ s = a.build_types(f, [unicode])
+ assert isinstance(s, annmodel.SomeUnicodeString)
+
+ def test_unicode_buildtypes(self):
+ def f(x):
+ return x
+
+ a = self.RPythonAnnotator()
+ s = a.build_types(f, [unicode])
+ assert isinstance(s, annmodel.SomeUnicodeString)
+
+ def test_replace_annotations(self):
+ def f(x):
+ return 'a'.replace(x, 'b')
+
+ a = self.RPythonAnnotator()
+ s = a.build_types(f, [str])
+ assert isinstance(s, annmodel.SomeString)
+
+ def f(x):
+ return u'a'.replace(x, u'b')
+
+ a = self.RPythonAnnotator()
+ s = a.build_types(f, [unicode])
+ assert isinstance(s, annmodel.SomeUnicodeString)
+
+ def test_unicode_char(self):
+ def f(x, i):
+ for c in x:
+ if c == i:
+ return c
+ return 'x'
+
+ a = self.RPythonAnnotator()
+ s = a.build_types(f, [unicode, str])
+ assert isinstance(s, annmodel.SomeUnicodeCodePoint)
+
def g(n):
return [0,1,2,n]
Modified: pypy/dist/pypy/annotation/unaryop.py
==============================================================================
--- pypy/dist/pypy/annotation/unaryop.py (original)
+++ pypy/dist/pypy/annotation/unaryop.py Fri Nov 9 13:53:02 2007
@@ -9,7 +9,7 @@
SomeExternalObject, SomeTypedAddressAccess, SomeAddress, \
SomeCTypesObject, s_ImpossibleValue, s_Bool, s_None, \
unionof, set, missing_operation, add_knowntypedata, HarmlesslyBlocked, \
- SomeGenericCallable, SomeWeakRef
+ SomeGenericCallable, SomeWeakRef, SomeUnicodeString
from pypy.annotation.bookkeeper import getbookkeeper
from pypy.annotation import builtin
from pypy.annotation.binaryop import _clone ## XXX where to put this?
@@ -25,7 +25,7 @@
'iter', 'next', 'invert', 'type', 'issubtype',
'pos', 'neg', 'nonzero', 'abs', 'hex', 'oct',
'ord', 'int', 'float', 'long', 'id',
- 'neg_ovf', 'abs_ovf', 'hint'])
+ 'neg_ovf', 'abs_ovf', 'hint', 'unicode', 'unichr'])
for opname in UNARY_OPERATIONS:
missing_operation(SomeObject, opname)
@@ -103,6 +103,10 @@
getbookkeeper().count('str', obj)
return SomeString()
+ def unicode(obj):
+ getbookkeeper().count('unicode', obj)
+ return SomeUnicodeString()
+
def repr(obj):
getbookkeeper().count('repr', obj)
return SomeString()
@@ -201,7 +205,6 @@
def invert(self):
return SomeInteger(knowntype=self.knowntype)
-
invert.can_only_throw = []
def pos(self):
@@ -402,7 +405,8 @@
return s_Bool
-class __extend__(SomeString):
+class __extend__(SomeString,
+ SomeUnicodeString):
def method_startswith(str, frag):
return s_Bool
@@ -420,27 +424,29 @@
return SomeInteger(nonneg=True)
def method_strip(str, chr):
- return SomeString()
+ return str.basestringclass()
def method_lstrip(str, chr):
- return SomeString()
+ return str.basestringclass()
def method_rstrip(str, chr):
- return SomeString()
+ return str.basestringclass()
def method_join(str, s_list):
getbookkeeper().count("str_join", str)
s_item = s_list.listdef.read_item()
if isinstance(s_item, SomeImpossibleValue):
+ if isinstance(str, SomeUnicodeString):
+ return immutablevalue(u"")
return immutablevalue("")
- return SomeString()
+ return str.basestringclass()
def iter(str):
return SomeIterator(str)
iter.can_only_throw = []
def getanyitem(str):
- return SomeChar()
+ return str.basecharclass()
def ord(str):
return SomeInteger(nonneg=True)
@@ -450,17 +456,36 @@
def method_split(str, patt): # XXX
getbookkeeper().count("str_split", str, patt)
- return getbookkeeper().newlist(SomeString())
+ return getbookkeeper().newlist(str.basestringclass())
def method_replace(str, s1, s2):
- return SomeString()
+ return str.basestringclass()
- def method_lower(str):
+class __extend__(SomeUnicodeString):
+ def method_encode(uni, s_enc):
+ if not s_enc.is_constant():
+ raise TypeError("Non-constant encoding not supported")
+ enc = s_enc.const
+ if enc != 'ascii':
+ raise TypeError("Encoding %s not supported for unicode" % (enc,))
return SomeString()
+ method_encode.can_only_throw = [UnicodeEncodeError]
+class __extend__(SomeString):
def method_upper(str):
return SomeString()
+ def method_lower(str):
+ return SomeString()
+
+ def method_decode(str, s_enc):
+ if not s_enc.is_constant():
+ raise TypeError("Non-constant encoding not supported")
+ enc = s_enc.const
+ if enc != 'ascii':
+ raise TypeError("Encoding %s not supported for strings" % (enc,))
+ return SomeUnicodeString()
+ method_decode.can_only_throw = [UnicodeDecodeError]
class __extend__(SomeChar):
@@ -485,12 +510,6 @@
def method_isupper(chr):
return s_Bool
-class __extend__(SomeUnicodeCodePoint):
-
- def ord(uchr):
- return SomeInteger(nonneg=True)
-
-
class __extend__(SomeIterator):
def iter(itr):
Modified: pypy/dist/pypy/objspace/descroperation.py
==============================================================================
--- pypy/dist/pypy/objspace/descroperation.py (original)
+++ pypy/dist/pypy/objspace/descroperation.py Fri Nov 9 13:53:02 2007
@@ -592,7 +592,7 @@
setattr(DescrOperation,_name,_impl_maker(_symbol,_specialnames))
elif _name not in ['is_', 'id','type','issubtype',
# not really to be defined in DescrOperation
- 'ord']:
+ 'ord', 'unichr', 'unicode']:
raise Exception, "missing def for operation %s" % _name
Modified: pypy/dist/pypy/objspace/flow/test/test_objspace.py
==============================================================================
--- pypy/dist/pypy/objspace/flow/test/test_objspace.py (original)
+++ pypy/dist/pypy/objspace/flow/test/test_objspace.py Fri Nov 9 13:53:02 2007
@@ -712,6 +712,21 @@
return x[s]
graph = self.codetest(myfunc)
+ def test_unichr_constfold(self):
+ py.test.skip("not working")
+ def myfunc():
+ return unichr(1234)
+ graph = self.codetest(myfunc)
+ assert graph.startblock.exits[0].target is graph.returnblock
+
+ def test_unicode_constfold(self):
+ py.test.skip("not working for now")
+ def myfunc():
+ return unicode("1234")
+ graph = self.codetest(myfunc)
+ assert graph.startblock.exits[0].target is graph.returnblock
+
+
def test_getitem(self):
def f(c, x):
try:
Modified: pypy/dist/pypy/objspace/std/test/test_unicodeobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/test/test_unicodeobject.py (original)
+++ pypy/dist/pypy/objspace/std/test/test_unicodeobject.py Fri Nov 9 13:53:02 2007
@@ -264,3 +264,17 @@
assert unicode(None) == u'None'
assert unicode(123) == u'123'
assert unicode([2, 3]) == u'[2, 3]'
+
+ def test_call_unicode(self):
+ skip("does not work")
+ class X:
+ def __unicode__(self):
+ return u'x'
+
+ try:
+ unicode(X(), 'ascii')
+ except TypeError, t:
+ assert 'need string or buffer' in str(t)
+ else:
+ raise Exception("DID NOT RAISE")
+
Modified: pypy/dist/pypy/rpython/extfunctable.py
==============================================================================
--- pypy/dist/pypy/rpython/extfunctable.py (original)
+++ pypy/dist/pypy/rpython/extfunctable.py Fri Nov 9 13:53:02 2007
@@ -196,5 +196,7 @@
IndexError : True,
AssertionError : True,
RuntimeError : True,
+ UnicodeDecodeError: True,
+ UnicodeEncodeError: True,
}
Modified: pypy/dist/pypy/rpython/llinterp.py
==============================================================================
--- pypy/dist/pypy/rpython/llinterp.py (original)
+++ pypy/dist/pypy/rpython/llinterp.py Fri Nov 9 13:53:02 2007
@@ -1086,6 +1086,9 @@
def op_oostring(self, obj, base):
return ootype.oostring(obj, base)
+ def op_oounicode(self, obj, base):
+ return ootype.oounicode(obj, base)
+
def op_ooparse_int(self, s, base):
try:
return ootype.ooparse_int(s, base)
Modified: pypy/dist/pypy/rpython/lltypesystem/rstr.py
==============================================================================
--- pypy/dist/pypy/rpython/lltypesystem/rstr.py (original)
+++ pypy/dist/pypy/rpython/lltypesystem/rstr.py Fri Nov 9 13:53:02 2007
@@ -8,12 +8,13 @@
from pypy.rpython.rmodel import inputconst, IntegerRepr
from pypy.rpython.rstr import AbstractStringRepr,AbstractCharRepr,\
AbstractUniCharRepr, AbstractStringIteratorRepr,\
- AbstractLLHelpers
+ AbstractLLHelpers, AbstractUnicodeRepr
from pypy.rpython.lltypesystem import ll_str
from pypy.rpython.lltypesystem.lltype import \
GcStruct, Signed, Array, Char, UniChar, Ptr, malloc, \
- Bool, Void, GcArray, nullptr, pyobjectptr, cast_primitive
-
+ Bool, Void, GcArray, nullptr, pyobjectptr, cast_primitive, typeOf,\
+ staticAdtMethod, GcForwardReference
+from pypy.rpython.rmodel import Repr
# ____________________________________________________________
#
@@ -24,19 +25,43 @@
# chars: array of Char
# }
-STR = GcStruct('rpy_string', ('hash', Signed),
- ('chars', Array(Char, hints={'immutable': True,
- 'isrpystring': True})))
-UNICODE = GcStruct('rpy_unicode', ('hash', Signed),
- ('chars', Array(UniChar, hints={'immutable': True})))
+STR = GcForwardReference()
+UNICODE = GcForwardReference()
+
+def new_malloc(TP):
+ def mallocstr(length):
+ debug_assert(length >= 0, "negative string length")
+ r = malloc(TP, length)
+ if not we_are_translated() or not malloc_zero_filled:
+ r.hash = 0
+ return r
+ mallocstr._annspecialcase_ = 'specialize:semierased'
+ return mallocstr
+
+mallocstr = new_malloc(STR)
+mallocunicode = new_malloc(UNICODE)
+
+def emptystrfun():
+ return emptystr
+
+def emptyunicodefun():
+ return emptyunicode
+
+STR.become(GcStruct('rpy_string', ('hash', Signed),
+ ('chars', Array(Char, hints={'immutable': True,
+ 'isrpystring': True})),
+ adtmeths={'malloc' : staticAdtMethod(mallocstr),
+ 'empty' : staticAdtMethod(emptystrfun)}))
+UNICODE.become(GcStruct('rpy_unicode', ('hash', Signed),
+ ('chars', Array(UniChar, hints={'immutable': True})),
+ adtmeths={'malloc' : staticAdtMethod(mallocunicode),
+ 'empty' : staticAdtMethod(emptyunicodefun)}
+ ))
SIGNED_ARRAY = GcArray(Signed)
CONST_STR_CACHE = WeakValueDictionary()
+CONST_UNICODE_CACHE = WeakValueDictionary()
-class BaseStringRepr(AbstractStringRepr):
- def __init__(self, *args):
- AbstractStringRepr.__init__(self, *args)
- self.ll = LLHelpers
-
+class BaseLLStringRepr(Repr):
def convert_const(self, value):
if value is None:
return nullptr(self.lowleveltype.TO)
@@ -44,20 +69,21 @@
if not isinstance(value, self.basetype):
raise TyperError("not a str: %r" % (value,))
try:
- return CONST_STR_CACHE[value]
+ return self.CACHE[value]
except KeyError:
p = self.malloc(len(value))
for i in range(len(value)):
p.chars[i] = cast_primitive(self.base, value[i])
p.hash = 0
self.ll.ll_strhash(p) # precompute the hash
- CONST_STR_CACHE[value] = p
+ self.CACHE[value] = p
return p
def make_iterator_repr(self):
- return string_iterator_repr
+ return self.iterator_repr
def can_ll_be_null(self, s_value):
+ # XXX unicode
if self is string_repr:
return s_value.can_be_none()
else:
@@ -71,28 +97,44 @@
v_items = hop.gendirectcall(LIST.ll_items, v_lst)
return v_length, v_items
-class StringRepr(BaseStringRepr):
+class StringRepr(BaseLLStringRepr, AbstractStringRepr):
lowleveltype = Ptr(STR)
basetype = str
base = Char
+ CACHE = CONST_STR_CACHE
def __init__(self, *args):
- BaseStringRepr.__init__(self, *args)
+ AbstractStringRepr.__init__(self, *args)
+ self.ll = LLHelpers
self.malloc = mallocstr
-class UnicodeRepr(BaseStringRepr):
+class UnicodeRepr(BaseLLStringRepr, AbstractUnicodeRepr):
lowleveltype = Ptr(UNICODE)
basetype = basestring
base = UniChar
+ CACHE = CONST_UNICODE_CACHE
def __init__(self, *args):
- BaseStringRepr.__init__(self, *args)
+ AbstractUnicodeRepr.__init__(self, *args)
+ self.ll = LLHelpers
self.malloc = mallocunicode
+ def ll_str(self, s):
+ # XXX crazy that this is here, but I don't want to break
+ # rmodel logic
+ lgt = len(s.chars)
+ result = mallocstr(lgt)
+ for i in range(lgt):
+ c = s.chars[i]
+ if ord(c) > 127:
+ raise UnicodeEncodeError("character not in ascii range")
+ result.chars[i] = cast_primitive(Char, c)
+ return result
+
class CharRepr(AbstractCharRepr, StringRepr):
lowleveltype = Char
-class UniCharRepr(AbstractUniCharRepr):
+class UniCharRepr(AbstractUniCharRepr, UnicodeRepr):
lowleveltype = UniChar
class __extend__(pairtype(PyObjRepr, AbstractStringRepr)):
@@ -119,19 +161,6 @@
resulttype=pyobj_repr,
_callable= lambda v: pyobjectptr(''.join(v.chars)))
-def new_malloc(TP):
- def mallocstr(length):
- debug_assert(length >= 0, "negative string length")
- r = malloc(TP, length)
- if not we_are_translated() or not malloc_zero_filled:
- r.hash = 0
- return r
- mallocstr._annspecialcase_ = 'specialize:semierased'
- return mallocstr
-
-mallocstr = new_malloc(STR)
-mallocunicode = new_malloc(UNICODE)
-
# ____________________________________________________________
#
# Low-level methods. These can be run for testing, but are meant to
@@ -163,9 +192,13 @@
class LLHelpers(AbstractLLHelpers):
def ll_char_mul(ch, times):
+ if typeOf(ch) is Char:
+ malloc = mallocstr
+ else:
+ malloc = mallocunicode
if times < 0:
times = 0
- newstr = mallocstr(times)
+ newstr = malloc(times)
j = 0
while j < times:
newstr.chars[j] = ch
@@ -183,10 +216,23 @@
ll_stritem_nonneg._annenforceargs_ = [None, int]
def ll_chr2str(ch):
- s = mallocstr(1)
+ if typeOf(ch) is Char:
+ malloc = mallocstr
+ else:
+ malloc = mallocunicode
+ s = malloc(1)
s.chars[0] = ch
return s
+ def ll_str2unicode(str):
+ lgt = len(str.chars)
+ s = mallocunicode(lgt)
+ for i in range(lgt):
+ if ord(str.chars[i]) > 127:
+ raise UnicodeDecodeError
+ s.chars[i] = cast_primitive(UniChar, str.chars[i])
+ return s
+
def ll_strhash(s):
# unlike CPython, there is no reason to avoid to return -1
# but our malloc initializes the memory to zero, so we use zero as the
@@ -204,7 +250,7 @@
def ll_strconcat(s1, s2):
len1 = len(s1.chars)
len2 = len(s2.chars)
- newstr = mallocstr(len1 + len2)
+ newstr = s1.malloc(len1 + len2)
j = 0
while j < len1:
newstr.chars[j] = s1.chars[j]
@@ -219,7 +265,7 @@
def ll_strip(s, ch, left, right):
s_len = len(s.chars)
if s_len == 0:
- return emptystr
+ return s.empty()
lpos = 0
rpos = s_len - 1
if left:
@@ -229,7 +275,7 @@
while lpos < rpos and s.chars[rpos] == ch:
rpos -= 1
r_len = rpos - lpos + 1
- result = mallocstr(r_len)
+ result = s.malloc(r_len)
i = 0
j = lpos
while i < r_len:
@@ -242,9 +288,9 @@
s_chars = s.chars
s_len = len(s_chars)
if s_len == 0:
- return emptystr
+ return s.empty()
i = 0
- result = mallocstr(s_len)
+ result = s.malloc(s_len)
while i < s_len:
ch = s_chars[i]
if 'a' <= ch <= 'z':
@@ -257,9 +303,9 @@
s_chars = s.chars
s_len = len(s_chars)
if s_len == 0:
- return emptystr
+ return s.empty()
i = 0
- result = mallocstr(s_len)
+ result = s.malloc(s_len)
while i < s_len:
ch = s_chars[i]
if 'A' <= ch <= 'Z':
@@ -273,13 +319,13 @@
s_len = len(s_chars)
num_items = length
if num_items == 0:
- return emptystr
+ return s.empty()
itemslen = 0
i = 0
while i < num_items:
itemslen += len(items[i].chars)
i += 1
- result = mallocstr(itemslen + s_len * (num_items - 1))
+ result = s.malloc(itemslen + s_len * (num_items - 1))
res_chars = result.chars
res_index = 0
i = 0
@@ -529,7 +575,11 @@
while i < num_items:
itemslen += len(items[i].chars)
i += 1
- result = mallocstr(itemslen)
+ if typeOf(items).TO.OF.TO == STR:
+ malloc = mallocstr
+ else:
+ malloc = mallocunicode
+ result = malloc(itemslen)
res_chars = result.chars
res_index = 0
i = 0
@@ -546,7 +596,11 @@
def ll_join_chars(length, chars):
num_chars = length
- result = mallocstr(num_chars)
+ if typeOf(chars).TO.OF == Char:
+ malloc = mallocstr
+ else:
+ malloc = mallocunicode
+ result = malloc(num_chars)
res_chars = result.chars
i = 0
while i < num_chars:
@@ -556,7 +610,7 @@
def ll_stringslice_startonly(s1, start):
len1 = len(s1.chars)
- newstr = mallocstr(len1 - start)
+ newstr = s1.malloc(len1 - start)
j = 0
while start < len1:
newstr.chars[j] = s1.chars[start]
@@ -571,7 +625,7 @@
if start == 0:
return s1
stop = len(s1.chars)
- newstr = mallocstr(stop - start)
+ newstr = s1.malloc(stop - start)
j = 0
while start < stop:
newstr.chars[j] = s1.chars[start]
@@ -581,7 +635,7 @@
def ll_stringslice_minusone(s1):
newlen = len(s1.chars) - 1
- newstr = mallocstr(newlen)
+ newstr = s1.malloc(newlen)
j = 0
while j < newlen:
newstr.chars[j] = s1.chars[j]
@@ -604,7 +658,7 @@
resindex = 0
while j < strlen:
if chars[j] == c:
- item = items[resindex] = mallocstr(j - i)
+ item = items[resindex] = s.malloc(j - i)
newchars = item.chars
k = i
while k < j:
@@ -613,7 +667,7 @@
resindex += 1
i = j + 1
j += 1
- item = items[resindex] = mallocstr(j - i)
+ item = items[resindex] = s.malloc(j - i)
newchars = item.chars
k = i
while k < j:
@@ -625,7 +679,7 @@
def ll_replace_chr_chr(s, c1, c2):
length = len(s.chars)
- newstr = mallocstr(length)
+ newstr = s.malloc(length)
src = s.chars
dst = newstr.chars
j = 0
@@ -782,22 +836,42 @@
unichar_repr.ll = LLHelpers
unicode_repr = UnicodeRepr()
emptystr = string_repr.convert_const("")
+emptyunicode = unicode_repr.convert_const(u'')
StringRepr.repr = string_repr
UnicodeRepr.repr = unicode_repr
+UniCharRepr.repr = unicode_repr
+UniCharRepr.char_repr = unichar_repr
+UnicodeRepr.char_repr = unichar_repr
+CharRepr.char_repr = char_repr
+StringRepr.char_repr = char_repr
+
+class BaseStringIteratorRepr(AbstractStringIteratorRepr):
-class StringIteratorRepr(AbstractStringIteratorRepr):
+ def __init__(self):
+ self.ll_striter = ll_striter
+ self.ll_strnext = ll_strnext
+class StringIteratorRepr(BaseStringIteratorRepr):
+
lowleveltype = Ptr(GcStruct('stringiter',
('string', string_repr.lowleveltype),
('index', Signed)))
- def __init__(self):
- self.ll_striter = ll_striter
- self.ll_strnext = ll_strnext
+class UnicodeIteratorRepr(BaseStringIteratorRepr):
+
+ lowleveltype = Ptr(GcStruct('unicodeiter',
+ ('string', unicode_repr.lowleveltype),
+ ('index', Signed)))
def ll_striter(string):
- iter = malloc(string_iterator_repr.lowleveltype.TO)
+ if typeOf(string) == string_repr.lowleveltype:
+ TP = string_repr.iterator_repr.lowleveltype.TO
+ elif typeOf(string) == unicode_repr.lowleveltype:
+ TP = unicode_repr.iterator_repr.lowleveltype.TO
+ else:
+ raise TypeError("Unknown string type %s" % (typeOf(string),))
+ iter = malloc(TP)
iter.string = string
iter.index = 0
return iter
@@ -810,8 +884,8 @@
iter.index = index + 1
return chars[index]
-string_iterator_repr = StringIteratorRepr()
-
+string_repr.iterator_repr = StringIteratorRepr()
+unicode_repr.iterator_repr = UnicodeIteratorRepr()
# these should be in rclass, but circular imports prevent (also it's
# not that insane that a string constant is built in this file).
Modified: pypy/dist/pypy/rpython/module/support.py
==============================================================================
--- pypy/dist/pypy/rpython/module/support.py (original)
+++ pypy/dist/pypy/rpython/module/support.py Fri Nov 9 13:53:02 2007
@@ -29,6 +29,16 @@
return p
to_rstr = staticmethod(to_rstr)
+ def to_runicode(s):
+ from pypy.rpython.lltypesystem.rstr import UNICODE, mallocunicode
+ if s is None:
+ return lltype.nullptr(UNICODE)
+ p = mallocunicode(len(s))
+ for i in range(len(s)):
+ p.chars[i] = s[i]
+ return p
+ to_runicode = staticmethod(to_runicode)
+
def from_rstr(rs):
if not rs: # null pointer
return None
Modified: pypy/dist/pypy/rpython/ootypesystem/ooregistry.py
==============================================================================
--- pypy/dist/pypy/rpython/ootypesystem/ooregistry.py (original)
+++ pypy/dist/pypy/rpython/ootypesystem/ooregistry.py Fri Nov 9 13:53:02 2007
@@ -24,6 +24,19 @@
vlist = hop.inputargs(hop.args_r[0], ootype.Signed)
return hop.genop('oostring', vlist, resulttype = ootype.String)
+class Entry_oounicode(ExtRegistryEntry):
+ _about_ = ootype.oounicode
+
+ def compute_result_annotation(self, obj_s, base_s):
+ assert isinstance(obj_s, annmodel.SomeUnicodeCodePoint)
+ assert isinstance(base_s, annmodel.SomeInteger)
+ return annmodel.SomeOOInstance(ootype.Unicode)
+
+ def specialize_call(self, hop):
+ assert isinstance(hop.args_s[0],annmodel.SomeUnicodeCodePoint)
+ vlist = hop.inputargs(hop.args_r[0], ootype.Signed)
+ return hop.genop('oounicode', vlist, resulttype = ootype.Unicode)
+
class Entry_ootype_string(ExtRegistryEntry):
_type_ = ootype._string
@@ -72,11 +85,13 @@
def compute_result_annotation(self, str_s):
assert isinstance(str_s, annmodel.SomeOOInstance)\
- and str_s.ootype is ootype.String
+ and (str_s.ootype is ootype.String or
+ str_s.ootype is ootype.Unicode)
return annmodel.SomeInteger()
def specialize_call(self, hop):
assert isinstance(hop.args_s[0], annmodel.SomeOOInstance)\
- and hop.args_s[0].ootype is ootype.String
+ and (hop.args_s[0].ootype is ootype.String or
+ hop.args_s[0].ootype is ootype.Unicode)
vlist = hop.inputargs(hop.args_r[0])
return hop.genop('oohash', vlist, resulttype=ootype.Signed)
Modified: pypy/dist/pypy/rpython/ootypesystem/ootype.py
==============================================================================
--- pypy/dist/pypy/rpython/ootypesystem/ootype.py (original)
+++ pypy/dist/pypy/rpython/ootypesystem/ootype.py Fri Nov 9 13:53:02 2007
@@ -322,16 +322,14 @@
return set()
-# WARNING: the name 'String' is rebound at the end of file
-class String(BuiltinADTType):
- SELFTYPE_T = object()
+class AbstractString(BuiltinADTType):
def __init__(self):
self._null = _null_string(self)
generic_types = { self.SELFTYPE_T: self }
self._GENERIC_METHODS = frozendict({
- "ll_stritem_nonneg": Meth([Signed], Char),
+ "ll_stritem_nonneg": Meth([Signed], self.CHAR),
"ll_strlen": Meth([], Signed),
"ll_strconcat": Meth([self.SELFTYPE_T], self.SELFTYPE_T),
"ll_streq": Meth([self.SELFTYPE_T], Bool),
@@ -341,47 +339,74 @@
"ll_find": Meth([self.SELFTYPE_T, Signed, Signed], Signed),
"ll_rfind": Meth([self.SELFTYPE_T, Signed, Signed], Signed),
"ll_count": Meth([self.SELFTYPE_T, Signed, Signed], Signed),
- "ll_find_char": Meth([Char, Signed, Signed], Signed),
- "ll_rfind_char": Meth([Char, Signed, Signed], Signed),
- "ll_count_char": Meth([Char, Signed, Signed], Signed),
- "ll_strip": Meth([Char, Bool, Bool], self.SELFTYPE_T),
+ "ll_find_char": Meth([self.CHAR, Signed, Signed], Signed),
+ "ll_rfind_char": Meth([self.CHAR, Signed, Signed], Signed),
+ "ll_count_char": Meth([self.CHAR, Signed, Signed], Signed),
+ "ll_strip": Meth([self.CHAR, Bool, Bool], self.SELFTYPE_T),
"ll_upper": Meth([], self.SELFTYPE_T),
"ll_lower": Meth([], self.SELFTYPE_T),
"ll_substring": Meth([Signed, Signed], self.SELFTYPE_T), # ll_substring(start, count)
- "ll_split_chr": Meth([Char], List(self.SELFTYPE_T)),
- "ll_contains": Meth([Char], Bool),
- "ll_replace_chr_chr": Meth([Char, Char], self.SELFTYPE_T),
+ "ll_split_chr": Meth([self.CHAR], List(self.SELFTYPE_T)),
+ "ll_contains": Meth([self.CHAR], Bool),
+ "ll_replace_chr_chr": Meth([self.CHAR, self.CHAR], self.SELFTYPE_T),
})
self._setup_methods(generic_types)
+ def _example(self):
+ return self._defl()
+
+ def _get_interp_class(self):
+ return _string
+
+ def _specialize(self, generic_types):
+ return self
+
+# WARNING: the name 'String' is rebound at the end of file
+class String(AbstractString):
+ SELFTYPE_T = object()
+ CHAR = Char
+
+ # TODO: should it return _null or ''?
+ def _defl(self):
+ return make_string('')
+
def _enforce(self, value):
+ # XXX share this with Unicode?
TYPE = typeOf(value)
- if TYPE == Char:
+ if TYPE == self.CHAR:
return make_string(value)
else:
return BuiltinADTType._enforce(self, value)
+
+# WARNING: the name 'Unicode' is rebound at the end of file
+class Unicode(AbstractString):
+ SELFTYPE_T = object()
+ CHAR = UniChar
+
# TODO: should it return _null or ''?
def _defl(self):
- return make_string("")
- def _example(self):
- return self._defl()
+ return make_unicode(u'')
+
+ def _enforce(self, value):
+ TYPE = typeOf(value)
+ if TYPE == self.CHAR:
+ return make_unicode(value)
+ else:
+ return BuiltinADTType._enforce(self, value)
+
- def _get_interp_class(self):
- return _string
- def _specialize(self, generic_types):
- return self
# WARNING: the name 'StringBuilder' is rebound at the end of file
class StringBuilder(BuiltinADTType):
- def __init__(self):
+ def __init__(self, STRINGTP, CHARTP):
self._null = _null_string_builder(self)
self._GENERIC_METHODS = frozendict({
"ll_allocate": Meth([Signed], Void),
- "ll_append_char": Meth([Char], Void),
- "ll_append": Meth([String], Void),
- "ll_build": Meth([], String),
+ "ll_append_char": Meth([CHARTP], Void),
+ "ll_append": Meth([STRINGTP], Void),
+ "ll_build": Meth([], STRINGTP),
})
self._setup_methods({})
@@ -837,6 +862,10 @@
assert isinstance(value, str)
return _string(String, value)
+def make_unicode(value):
+ assert isinstance(value, unicode)
+ return _string(Unicode, value)
+
def make_instance(INSTANCE):
inst = _instance(INSTANCE)
if STATICNESS:
@@ -1049,6 +1078,14 @@
def __cmp__(self, other):
return cmp(self._str, other._str)
+ def make_string(self, value):
+ if self._TYPE is String:
+ return make_string(value)
+ elif self._TYPE is Unicode:
+ return make_unicode(value)
+ else:
+ assert False, 'Unknown type %s' % self._TYPE
+
def ll_stritem_nonneg(self, i):
# NOT_RPYTHON
s = self._str
@@ -1061,7 +1098,7 @@
def ll_strconcat(self, s):
# NOT_RPYTHON
- return make_string(self._str + s._str)
+ return self.make_string(self._str + s._str)
def ll_streq(self, s):
# NOT_RPYTON
@@ -1110,24 +1147,24 @@
s = s.lstrip(ch)
if right:
s = s.rstrip(ch)
- return make_string(s)
+ return self.make_string(s)
def ll_upper(self):
# NOT_RPYTHON
- return make_string(self._str.upper())
+ return self.make_string(self._str.upper())
def ll_lower(self):
# NOT_RPYTHON
- return make_string(self._str.lower())
+ return self.make_string(self._str.lower())
def ll_substring(self, start, count):
# NOT_RPYTHON
- return make_string(self._str[start:start+count])
+ return self.make_string(self._str[start:start+count])
def ll_split_chr(self, ch):
# NOT_RPYTHON
- res = _list(List(String))
- res._list = [make_string(s) for s in self._str.split(ch)]
+ res = _list(List(self._TYPE))
+ res._list = [self.make_string(s) for s in self._str.split(ch)]
return res
def ll_contains(self, ch):
@@ -1136,7 +1173,7 @@
def ll_replace_chr_chr(self, ch1, ch2):
# NOT_RPYTHON
- return make_string(self._str.replace(ch1, ch2))
+ return self.make_string(self._str.replace(ch1, ch2))
class _null_string(_null_mixin(_string), _string):
def __init__(self, STRING):
@@ -1154,7 +1191,7 @@
# do nothing
def ll_append_char(self, ch):
- assert isinstance(ch, str) and len(ch) == 1
+ assert isinstance(ch, basestring) and len(ch) == 1
self._buf.append(ch)
def ll_append(self, s):
@@ -1162,7 +1199,10 @@
self._buf.append(s._str)
def ll_build(self):
- return make_string(''.join(self._buf))
+ if self._TYPE is StringBuilder:
+ return make_string(''.join(self._buf))
+ else:
+ return make_unicode(u''.join(self._buf))
class _null_string_builder(_null_mixin(_string_builder), _string_builder):
def __init__(self, STRING_BUILDER):
@@ -1501,7 +1541,8 @@
return inst._identityhash()
def oohash(inst):
- assert typeOf(inst) is String # for now only strings are supported
+ assert typeOf(inst) is String or typeOf(inst) is Unicode
+ # for now only strings and unicode are supported
return hash(inst._str)
def oostring(obj, base):
@@ -1520,6 +1561,17 @@
obj = '<%s object>' % obj._inst._TYPE._name
return make_string(str(obj))
+def oounicode(obj, base):
+ """
+ Convert an unichar into an unicode string.
+
+ base must be -1, for consistency with oostring.
+ """
+ assert base == -1
+ assert isinstance(obj, unicode)
+ assert len(obj) == 1
+ return make_unicode(obj)
+
def ooparse_int(s, base):
return int(s._str, base)
@@ -1548,6 +1600,10 @@
ROOT = Instance('Root', None, _is_root=True)
String = String()
-StringBuilder = StringBuilder()
+Unicode = Unicode()
+UnicodeBuilder = StringBuilder(Unicode, UniChar)
+StringBuilder = StringBuilder(String, Char)
+String.builder = StringBuilder
+Unicode.builder = UnicodeBuilder
WeakReference = WeakReference()
dead_wref = new(WeakReference)
Modified: pypy/dist/pypy/rpython/ootypesystem/rstr.py
==============================================================================
--- pypy/dist/pypy/rpython/ootypesystem/rstr.py (original)
+++ pypy/dist/pypy/rpython/ootypesystem/rstr.py Fri Nov 9 13:53:02 2007
@@ -1,34 +1,19 @@
+from pypy.tool.pairtype import pairtype
from pypy.rpython.error import TyperError
from pypy.rpython.rstr import AbstractStringRepr,AbstractCharRepr,\
AbstractUniCharRepr, AbstractStringIteratorRepr,\
- AbstractLLHelpers
+ AbstractLLHelpers, AbstractUnicodeRepr
from pypy.rpython.rmodel import IntegerRepr
-from pypy.rpython.lltypesystem.lltype import Ptr, Char, UniChar
+from pypy.rpython.lltypesystem.lltype import Ptr, Char, UniChar, typeOf,\
+ cast_primitive
from pypy.rpython.ootypesystem import ootype
+from pypy.rpython.rmodel import Repr
# TODO: investigate if it's possible and it's worth to concatenate a
# String and a Char directly without passing to Char-->String
# conversion
-class StringRepr(AbstractStringRepr):
- """
- Some comments about the state of ootype strings at the end of Tokyo sprint
-
- What was accomplished:
- - The rstr module was split in an lltype and ootype version.
- - There is the beginnings of a String type in ootype.
- - The runtime representation of Strings is a subclass of the builtin str.
- The idea is that this saves us from boilerplate code implementing the
- builtin str methods.
-
- Nothing more was done because of lack of time and paralysis in the face
- of too many problems. Among other things, to write any meaningful tests
- we first need conversion from Chars to Strings (because
- test_llinterp.interpret won't accept strings as arguments). We will need a
- new low-level operation (convert_char_to_oostring or some such) for this.
- """
-
- lowleveltype = ootype.String
+class BaseOOStringRepr(Repr):
def __init__(self, *args):
AbstractStringRepr.__init__(self, *args)
@@ -36,13 +21,16 @@
def convert_const(self, value):
if value is None:
- return ootype.String._null
- if not isinstance(value, str):
+ return self.lowleveltype._null
+ if not isinstance(value, self.basetype):
raise TyperError("not a str: %r" % (value,))
- return ootype.make_string(value)
+ return self.make_string(value)
+
+ def make_string(self, value):
+ raise NotImplementedError
def make_iterator_repr(self):
- return string_iterator_repr
+ return self.string_iterator_repr
def _list_length_items(self, hop, v_lst, LIST):
# ootypesystem list has a different interface that
@@ -53,17 +41,64 @@
return c_length, v_lst
+class StringRepr(BaseOOStringRepr, AbstractStringRepr):
+ lowleveltype = ootype.String
+ basetype = str
+
+ def make_string(self, value):
+ return ootype.make_string(value)
+
+class UnicodeRepr(BaseOOStringRepr, AbstractUnicodeRepr):
+ lowleveltype = ootype.Unicode
+ basetype = basestring
+
+ def make_string(self, value):
+ return ootype.make_unicode(value)
+
+ def ll_str(self, value):
+ sb = ootype.new(ootype.StringBuilder)
+ lgt = value.ll_strlen()
+ sb.ll_allocate(lgt)
+ for i in range(lgt):
+ c = value.ll_stritem_nonneg(i)
+ if ord(c) > 127:
+ raise UnicodeEncodeError("%d > 127, not ascii" % ord(c))
+ sb.ll_append_char(cast_primitive(Char, c))
+ return sb.ll_build()
+
class CharRepr(AbstractCharRepr, StringRepr):
lowleveltype = Char
-class UniCharRepr(AbstractUniCharRepr):
+class UniCharRepr(AbstractUniCharRepr, UnicodeRepr):
lowleveltype = UniChar
+
+class __extend__(pairtype(UniCharRepr, UnicodeRepr)):
+ def convert_from_to((r_from, r_to), v, llops):
+ rstr = llops.rtyper.type_system.rstr
+ if r_from == unichar_repr and r_to == unicode_repr:
+ return llops.gendirectcall(r_from.ll.ll_unichr2unicode, v)
+ return NotImplemented
+
class LLHelpers(AbstractLLHelpers):
def ll_chr2str(ch):
return ootype.oostring(ch, -1)
+ def ll_str2unicode(s):
+ res = ootype.new(ootype.UnicodeBuilder)
+ lgt = s.ll_strlen()
+ res.ll_allocate(lgt)
+ for i in range(lgt):
+ c = s.ll_stritem_nonneg(i)
+ if ord(c) > 127:
+ raise UnicodeDecodeError
+ res.ll_append_char(cast_primitive(UniChar, c))
+ return res.ll_build()
+
+ def ll_unichr2unicode(ch):
+ return ootype.oounicode(ch, -1)
+
def ll_strhash(s):
return ootype.oohash(s)
@@ -73,7 +108,10 @@
def ll_char_mul(ch, times):
if times < 0:
times = 0
- buf = ootype.new(ootype.StringBuilder)
+ if typeOf(ch) == Char:
+ buf = ootype.new(ootype.StringBuilder)
+ else:
+ buf = ootype.new(ootype.UnicodeBuilder)
buf.ll_allocate(times)
i = 0
while i<times:
@@ -95,7 +133,7 @@
def ll_join(s, length_dummy, lst):
length = lst.ll_length()
- buf = ootype.new(ootype.StringBuilder)
+ buf = ootype.new(typeOf(s).builder)
# TODO: check if it's worth of preallocating the buffer with
# the exact length
@@ -119,7 +157,10 @@
return buf.ll_build()
def ll_join_chars(length_dummy, lst):
- buf = ootype.new(ootype.StringBuilder)
+ if typeOf(lst)._ITEMTYPE == Char:
+ buf = ootype.new(ootype.StringBuilder)
+ else:
+ buf = ootype.new(ootype.UnicodeBuilder)
length = lst.ll_length()
buf.ll_allocate(length)
i = 0
@@ -129,7 +170,10 @@
return buf.ll_build()
def ll_join_strs(length_dummy, lst):
- buf = ootype.new(ootype.StringBuilder)
+ if typeOf(lst)._ITEMTYPE == ootype.String:
+ buf = ootype.new(ootype.StringBuilder)
+ else:
+ buf = ootype.new(ootype.UnicodeBuilder)
length = lst.ll_length()
#buf.ll_allocate(length)
i = 0
@@ -294,13 +338,19 @@
del add_helpers
do_stringformat = LLHelpers.do_stringformat
-string_repr = StringRepr()
char_repr = CharRepr()
unichar_repr = UniCharRepr()
char_repr.ll = LLHelpers
unichar_repr.ll = LLHelpers
-emptystr = string_repr.convert_const("")
+
+string_repr = StringRepr()
StringRepr.repr = string_repr
+StringRepr.char_repr = char_repr
+emptystr = string_repr.convert_const("")
+unicode_repr = UnicodeRepr()
+UnicodeRepr.repr = unicode_repr
+UnicodeRepr.char_repr = unichar_repr
+
class StringIteratorRepr(AbstractStringIteratorRepr):
lowleveltype = ootype.Record({'string': string_repr.lowleveltype,
@@ -310,8 +360,22 @@
self.ll_striter = ll_striter
self.ll_strnext = ll_strnext
+class UnicodeIteratorRepr(AbstractStringIteratorRepr):
+ lowleveltype = ootype.Record({'string': unicode_repr.lowleveltype,
+ 'index': ootype.Signed})
+
+ def __init__(self):
+ self.ll_striter = ll_unicodeiter
+ self.ll_strnext = ll_strnext
+
def ll_striter(string):
- iter = ootype.new(string_iterator_repr.lowleveltype)
+ iter = ootype.new(string_repr.string_iterator_repr.lowleveltype)
+ iter.string = string
+ iter.index = 0
+ return iter
+
+def ll_unicodeiter(string):
+ iter = ootype.new(unicode_repr.string_iterator_repr.lowleveltype)
iter.string = string
iter.index = 0
return iter
@@ -324,8 +388,9 @@
iter.index = index + 1
return string.ll_stritem_nonneg(index)
-string_iterator_repr = StringIteratorRepr()
+StringRepr.string_iterator_repr = StringIteratorRepr()
+UnicodeRepr.string_iterator_repr = UnicodeIteratorRepr()
# these should be in rclass, but circular imports prevent (also it's
# not that insane that a string constant is built in this file).
Modified: pypy/dist/pypy/rpython/ootypesystem/test/test_ooann.py
==============================================================================
--- pypy/dist/pypy/rpython/ootypesystem/test/test_ooann.py (original)
+++ pypy/dist/pypy/rpython/ootypesystem/test/test_ooann.py Fri Nov 9 13:53:02 2007
@@ -324,3 +324,17 @@
return c.foo(c)
a = RPythonAnnotator()
py.test.raises(TypeError, a.build_types, f, [])
+
+def test_unicode_iterator():
+ from pypy.rpython.ootypesystem import rstr
+ ITER = rstr.UnicodeRepr.string_iterator_repr.lowleveltype
+
+ def fn():
+ it = new(ITER)
+ return it.string
+ a = RPythonAnnotator()
+ res = a.build_types(fn, [])
+
+ assert ITER._field_type("string") is Unicode
+ assert isinstance(res, annmodel.SomeOOInstance)
+ assert res.ootype is Unicode
Modified: pypy/dist/pypy/rpython/rbuiltin.py
==============================================================================
--- pypy/dist/pypy/rpython/rbuiltin.py (original)
+++ pypy/dist/pypy/rpython/rbuiltin.py Fri Nov 9 13:53:02 2007
@@ -218,6 +218,9 @@
assert hop.nb_args == 1
return hop.args_r[0].rtype_unichr(hop)
+def rtype_builtin_unicode(hop):
+ return hop.args_r[0].rtype_unicode(hop)
+
def rtype_builtin_list(hop):
return hop.args_r[0].rtype_bltn_list(hop)
Modified: pypy/dist/pypy/rpython/rstr.py
==============================================================================
--- pypy/dist/pypy/rpython/rstr.py (original)
+++ pypy/dist/pypy/rpython/rstr.py Fri Nov 9 13:53:02 2007
@@ -16,9 +16,16 @@
class AbstractCharRepr(AbstractStringRepr):
pass
-class AbstractUniCharRepr(Repr):
+class AbstractUniCharRepr(AbstractStringRepr):
pass
+class AbstractUnicodeRepr(AbstractStringRepr):
+ def rtype_method_upper(self, hop):
+ raise TypeError("Cannot do toupper on unicode string")
+
+ def rtype_method_lower(self, hop):
+ raise TypeError("Cannot do tolower on unicode string")
+
class __extend__(annmodel.SomeString):
def rtyper_makerepr(self, rtyper):
return rtyper.type_system.rstr.string_repr
@@ -47,6 +54,9 @@
class __extend__(AbstractStringRepr):
+ def _str_reprs(self, hop):
+ return hop.args_r[0].repr, hop.args_r[1].repr
+
def get_ll_eq_function(self):
return self.ll.ll_streq
@@ -64,7 +74,7 @@
def rtype_is_true(self, hop):
s_str = hop.args_s[0]
if s_str.can_be_None:
- string_repr = hop.rtyper.type_system.rstr.string_repr
+ string_repr = hop.args_r[0].repr
v_str, = hop.inputargs(string_repr)
return hop.gendirectcall(self.ll.ll_str_is_true, v_str)
else:
@@ -72,32 +82,38 @@
return super(AbstractStringRepr, self).rtype_is_true(hop)
def rtype_ord(self, hop):
- string_repr = hop.rtyper.type_system.rstr.string_repr
+ string_repr = hop.args_r[0].repr
v_str, = hop.inputargs(string_repr)
c_zero = inputconst(Signed, 0)
v_chr = hop.gendirectcall(self.ll.ll_stritem_nonneg, v_str, c_zero)
- return hop.genop('cast_char_to_int', [v_chr], resulttype=Signed)
+ if string_repr is hop.rtyper.type_system.rstr.string_repr:
+ return hop.genop('cast_char_to_int', [v_chr], resulttype=Signed)
+ else:
+ assert string_repr is hop.rtyper.type_system.rstr.unicode_repr
+ return hop.genop('cast_unichar_to_int', [v_chr], resulttype=Signed)
def rtype_method_startswith(self, hop):
- string_repr = hop.rtyper.type_system.rstr.string_repr
- v_str, v_value = hop.inputargs(string_repr, string_repr)
+ str1_repr, str2_repr = self._str_reprs(hop)
+ v_str, v_value = hop.inputargs(str1_repr, str2_repr)
hop.exception_cannot_occur()
return hop.gendirectcall(self.ll.ll_startswith, v_str, v_value)
def rtype_method_endswith(self, hop):
- string_repr = hop.rtyper.type_system.rstr.string_repr
- v_str, v_value = hop.inputargs(string_repr, string_repr)
+ str1_repr, str2_repr = self._str_reprs(hop)
+ v_str, v_value = hop.inputargs(str1_repr, str2_repr)
hop.exception_cannot_occur()
return hop.gendirectcall(self.ll.ll_endswith, v_str, v_value)
def rtype_method_find(self, hop, reverse=False):
- rstr = hop.rtyper.type_system.rstr
- v_str = hop.inputarg(rstr.string_repr, arg=0)
- if hop.args_r[1] == rstr.char_repr:
- v_value = hop.inputarg(rstr.char_repr, arg=1)
+ # XXX binaryop
+ string_repr = hop.args_r[0].repr
+ char_repr = hop.args_r[0].char_repr
+ v_str = hop.inputarg(string_repr, arg=0)
+ if hop.args_r[1] == char_repr:
+ v_value = hop.inputarg(char_repr, arg=1)
llfn = reverse and self.ll.ll_rfind_char or self.ll.ll_find_char
else:
- v_value = hop.inputarg(rstr.string_repr, arg=1)
+ v_value = hop.inputarg(string_repr, arg=1)
llfn = reverse and self.ll.ll_rfind or self.ll.ll_find
if hop.nb_args > 2:
v_start = hop.inputarg(Signed, arg=2)
@@ -118,13 +134,13 @@
return self.rtype_method_find(hop, reverse=True)
def rtype_method_count(self, hop):
- rstr = hop.rtyper.type_system.rstr
- v_str = hop.inputarg(rstr.string_repr, arg=0)
+ rstr = hop.args_r[0].repr
+ v_str = hop.inputarg(rstr.repr, arg=0)
if hop.args_r[1] == rstr.char_repr:
v_value = hop.inputarg(rstr.char_repr, arg=1)
llfn = self.ll.ll_count_char
else:
- v_value = hop.inputarg(rstr.string_repr, arg=1)
+ v_value = hop.inputarg(rstr.repr, arg=1)
llfn = self.ll.ll_count
if hop.nb_args > 2:
v_start = hop.inputarg(Signed, arg=2)
@@ -142,8 +158,8 @@
return hop.gendirectcall(llfn, v_str, v_value, v_start, v_end)
def rtype_method_strip(self, hop, left=True, right=True):
- rstr = hop.rtyper.type_system.rstr
- v_str = hop.inputarg(rstr.string_repr, arg=0)
+ rstr = hop.args_r[0].repr
+ v_str = hop.inputarg(rstr.repr, arg=0)
v_char = hop.inputarg(rstr.char_repr, arg=1)
v_left = hop.inputconst(Bool, left)
v_right = hop.inputconst(Bool, right)
@@ -156,13 +172,13 @@
return self.rtype_method_strip(hop, left=False, right=True)
def rtype_method_upper(self, hop):
- string_repr = hop.rtyper.type_system.rstr.string_repr
+ string_repr = hop.args_r[0].repr
v_str, = hop.inputargs(string_repr)
hop.exception_cannot_occur()
return hop.gendirectcall(self.ll.ll_upper, v_str)
def rtype_method_lower(self, hop):
- string_repr = hop.rtyper.type_system.rstr.string_repr
+ string_repr = hop.args_r[0].repr
v_str, = hop.inputargs(string_repr)
hop.exception_cannot_occur()
return hop.gendirectcall(self.ll.ll_lower, v_str)
@@ -174,17 +190,17 @@
def rtype_method_join(self, hop):
hop.exception_cannot_occur()
- rstr = hop.rtyper.type_system.rstr
+ rstr = hop.args_r[0]
if hop.s_result.is_constant():
- return inputconst(rstr.string_repr, hop.s_result.const)
+ return inputconst(rstr.repr, hop.s_result.const)
r_lst = hop.args_r[1]
if not isinstance(r_lst, hop.rtyper.type_system.rlist.BaseListRepr):
raise TyperError("string.join of non-list: %r" % r_lst)
- v_str, v_lst = hop.inputargs(rstr.string_repr, r_lst)
+ v_str, v_lst = hop.inputargs(rstr.repr, r_lst)
v_length, v_items = self._list_length_items(hop, v_lst, r_lst.lowleveltype)
if hop.args_s[0].is_constant() and hop.args_s[0].const == '':
- if r_lst.item_repr == rstr.string_repr:
+ if r_lst.item_repr == rstr.repr:
llfn = self.ll.ll_join_strs
elif r_lst.item_repr == rstr.char_repr:
llfn = self.ll.ll_join_chars
@@ -192,15 +208,15 @@
raise TyperError("''.join() of non-string list: %r" % r_lst)
return hop.gendirectcall(llfn, v_length, v_items)
else:
- if r_lst.item_repr == rstr.string_repr:
+ if r_lst.item_repr == rstr.repr:
llfn = self.ll.ll_join
else:
raise TyperError("sep.join() of non-string list: %r" % r_lst)
return hop.gendirectcall(llfn, v_str, v_length, v_items)
def rtype_method_split(self, hop):
- rstr = hop.rtyper.type_system.rstr
- v_str, v_chr = hop.inputargs(rstr.string_repr, rstr.char_repr)
+ rstr = hop.args_r[0].repr
+ v_str, v_chr = hop.inputargs(rstr.repr, rstr.char_repr)
try:
list_type = hop.r_result.lowleveltype.TO
except AttributeError:
@@ -210,16 +226,16 @@
return hop.gendirectcall(self.ll.ll_split_chr, cLIST, v_str, v_chr)
def rtype_method_replace(self, hop):
- rstr = hop.rtyper.type_system.rstr
+ rstr = hop.args_r[0].repr
if not (hop.args_r[1] == rstr.char_repr and hop.args_r[2] == rstr.char_repr):
raise TyperError, 'replace only works for char args'
- v_str, v_c1, v_c2 = hop.inputargs(rstr.string_repr, rstr.char_repr, rstr.char_repr)
+ v_str, v_c1, v_c2 = hop.inputargs(rstr.repr, rstr.char_repr, rstr.char_repr)
hop.exception_cannot_occur()
return hop.gendirectcall(self.ll.ll_replace_chr_chr, v_str, v_c1, v_c2)
def rtype_int(self, hop):
hop.has_implicit_exception(ValueError) # record that we know about it
- string_repr = hop.rtyper.type_system.rstr.string_repr
+ string_repr = hop.args_r[0].repr
if hop.nb_args == 1:
v_str, = hop.inputargs(string_repr)
c_base = inputconst(Signed, 10)
@@ -231,9 +247,22 @@
hop.exception_is_here()
return hop.gendirectcall(self.ll.ll_int, v_str, v_base)
+ def rtype_unicode(self, hop):
+ if hop.args_s[0].is_constant():
+ return hop.inputconst(hop.r_result, hop.s_result.const)
+ repr = hop.args_r[0].repr
+ v_str = hop.inputarg(repr, 0)
+ hop.exception_is_here()
+ return hop.gendirectcall(self.ll.ll_str2unicode, v_str)
+
+ def rtype_method_decode(self, hop):
+ v_self = hop.inputarg(self, 0)
+ hop.exception_is_here()
+ return hop.gendirectcall(self.ll.ll_str2unicode, v_self)
+
def rtype_float(self, hop):
hop.has_implicit_exception(ValueError) # record that we know about it
- string_repr = hop.rtyper.type_system.rstr.string_repr
+ string_repr = hop.args_r[0].repr
v_str, = hop.inputargs(string_repr)
hop.exception_is_here()
return hop.gendirectcall(self.ll.ll_float, v_str)
@@ -241,6 +270,11 @@
def ll_str(self, s):
return s
+class __extend__(AbstractUnicodeRepr):
+ def rtype_method_encode(self, hop):
+ v_self = hop.inputarg(self, 0)
+ hop.exception_is_here()
+ return hop.gendirectcall(self.ll_str, v_self)
class __extend__(pairtype(AbstractStringRepr, Repr)):
def rtype_mod((r_str, _), hop):
@@ -280,73 +314,69 @@
class __extend__(pairtype(AbstractStringRepr, AbstractSliceRepr)):
def rtype_getitem((r_str, r_slic), hop):
- rstr = hop.rtyper.type_system.rstr
+ string_repr = r_str.repr
rslice = hop.rtyper.type_system.rslice
if r_slic == rslice.startonly_slice_repr:
- v_str, v_start = hop.inputargs(rstr.string_repr, rslice.startonly_slice_repr)
+ v_str, v_start = hop.inputargs(string_repr, rslice.startonly_slice_repr)
return hop.gendirectcall(r_str.ll.ll_stringslice_startonly, v_str, v_start)
if r_slic == rslice.startstop_slice_repr:
- v_str, v_slice = hop.inputargs(rstr.string_repr, rslice.startstop_slice_repr)
+ v_str, v_slice = hop.inputargs(string_repr, rslice.startstop_slice_repr)
return hop.gendirectcall(r_str.ll.ll_stringslice, v_str, v_slice)
if r_slic == rslice.minusone_slice_repr:
- v_str, v_ignored = hop.inputargs(rstr.string_repr, rslice.minusone_slice_repr)
+ v_str, v_ignored = hop.inputargs(string_repr, rslice.minusone_slice_repr)
return hop.gendirectcall(r_str.ll.ll_stringslice_minusone, v_str)
raise TyperError(r_slic)
-
class __extend__(pairtype(AbstractStringRepr, AbstractStringRepr)):
def rtype_add((r_str1, r_str2), hop):
- string_repr = hop.rtyper.type_system.rstr.string_repr
+ str1_repr = r_str1.repr
+ str2_repr = r_str2.repr
if hop.s_result.is_constant():
- return hop.inputconst(string_repr, hop.s_result.const)
- v_str1, v_str2 = hop.inputargs(string_repr, string_repr)
+ return hop.inputconst(str1_repr, hop.s_result.const)
+ v_str1, v_str2 = hop.inputargs(str1_repr, str2_repr)
return hop.gendirectcall(r_str1.ll.ll_strconcat, v_str1, v_str2)
rtype_inplace_add = rtype_add
def rtype_eq((r_str1, r_str2), hop):
- string_repr = hop.rtyper.type_system.rstr.string_repr
- v_str1, v_str2 = hop.inputargs(string_repr, string_repr)
+ v_str1, v_str2 = hop.inputargs(r_str1.repr, r_str2.repr)
return hop.gendirectcall(r_str1.ll.ll_streq, v_str1, v_str2)
def rtype_ne((r_str1, r_str2), hop):
- string_repr = hop.rtyper.type_system.rstr.string_repr
- v_str1, v_str2 = hop.inputargs(string_repr, string_repr)
+ v_str1, v_str2 = hop.inputargs(r_str1.repr, r_str2.repr)
vres = hop.gendirectcall(r_str1.ll.ll_streq, v_str1, v_str2)
return hop.genop('bool_not', [vres], resulttype=Bool)
def rtype_lt((r_str1, r_str2), hop):
- string_repr = hop.rtyper.type_system.rstr.string_repr
- v_str1, v_str2 = hop.inputargs(string_repr, string_repr)
+ v_str1, v_str2 = hop.inputargs(r_str1.repr, r_str2.repr)
vres = hop.gendirectcall(r_str1.ll.ll_strcmp, v_str1, v_str2)
return hop.genop('int_lt', [vres, hop.inputconst(Signed, 0)],
resulttype=Bool)
def rtype_le((r_str1, r_str2), hop):
- string_repr = hop.rtyper.type_system.rstr.string_repr
- v_str1, v_str2 = hop.inputargs(string_repr, string_repr)
+ v_str1, v_str2 = hop.inputargs(r_str1.repr, r_str2.repr)
vres = hop.gendirectcall(r_str1.ll.ll_strcmp, v_str1, v_str2)
return hop.genop('int_le', [vres, hop.inputconst(Signed, 0)],
resulttype=Bool)
def rtype_ge((r_str1, r_str2), hop):
- string_repr = hop.rtyper.type_system.rstr.string_repr
- v_str1, v_str2 = hop.inputargs(string_repr, string_repr)
+ v_str1, v_str2 = hop.inputargs(r_str1.repr, r_str2.repr)
vres = hop.gendirectcall(r_str1.ll.ll_strcmp, v_str1, v_str2)
return hop.genop('int_ge', [vres, hop.inputconst(Signed, 0)],
resulttype=Bool)
def rtype_gt((r_str1, r_str2), hop):
- string_repr = hop.rtyper.type_system.rstr.string_repr
- v_str1, v_str2 = hop.inputargs(string_repr, string_repr)
+ v_str1, v_str2 = hop.inputargs(r_str1.repr, r_str2.repr)
vres = hop.gendirectcall(r_str1.ll.ll_strcmp, v_str1, v_str2)
return hop.genop('int_gt', [vres, hop.inputconst(Signed, 0)],
resulttype=Bool)
-class __extend__(pairtype(AbstractStringRepr, AbstractCharRepr)):
+class __extend__(pairtype(AbstractStringRepr, AbstractCharRepr),
+ pairtype(AbstractUnicodeRepr, AbstractUniCharRepr)):
def rtype_contains((r_str, r_chr), hop):
- rstr = hop.rtyper.type_system.rstr
- v_str, v_chr = hop.inputargs(rstr.string_repr, rstr.char_repr)
+ string_repr = r_str.repr
+ char_repr = r_chr.char_repr
+ v_str, v_chr = hop.inputargs(string_repr, char_repr)
return hop.gendirectcall(r_str.ll.ll_contains, v_str, v_chr)
class __extend__(pairtype(AbstractStringRepr, AbstractTupleRepr)):
@@ -362,7 +392,8 @@
return r_str.ll.do_stringformat(hop, sourcevars)
-class __extend__(AbstractCharRepr):
+class __extend__(AbstractCharRepr,
+ AbstractUniCharRepr):
def convert_const(self, value):
if not isinstance(value, str) or len(value) != 1:
@@ -388,13 +419,13 @@
return hop.inputconst(Bool, True)
def rtype_ord(_, hop):
- rstr = hop.rtyper.type_system.rstr
- vlist = hop.inputargs(rstr.char_repr)
+ repr = hop.args_r[0].char_repr
+ vlist = hop.inputargs(repr)
return hop.genop('cast_char_to_int', vlist, resulttype=Signed)
def _rtype_method_isxxx(_, llfn, hop):
- rstr = hop.rtyper.type_system.rstr
- vlist = hop.inputargs(rstr.char_repr)
+ repr = hop.args_r[0].char_repr
+ vlist = hop.inputargs(repr)
hop.exception_cannot_occur()
return hop.gendirectcall(llfn, vlist[0])
@@ -411,18 +442,20 @@
def rtype_method_islower(self, hop):
return self._rtype_method_isxxx(self.ll.ll_char_islower, hop)
-class __extend__(pairtype(AbstractCharRepr, IntegerRepr)):
+class __extend__(pairtype(AbstractCharRepr, IntegerRepr),
+ pairtype(AbstractUniCharRepr, IntegerRepr)):
def rtype_mul((r_chr, r_int), hop):
- rstr = hop.rtyper.type_system.rstr
- v_char, v_int = hop.inputargs(rstr.char_repr, Signed)
+ char_repr = r_chr.char_repr
+ v_char, v_int = hop.inputargs(char_repr, Signed)
return hop.gendirectcall(r_chr.ll.ll_char_mul, v_char, v_int)
rtype_inplace_mul = rtype_mul
-class __extend__(pairtype(IntegerRepr, AbstractCharRepr)):
+class __extend__(pairtype(IntegerRepr, AbstractCharRepr),
+ pairtype(IntegerRepr, AbstractUniCharRepr)):
def rtype_mul((r_int, r_chr), hop):
- rstr = hop.rtyper.type_system.rstr
- v_int, v_char = hop.inputargs(Signed, rstr.char_repr)
+ char_repr = r_chr.char_repr
+ v_int, v_char = hop.inputargs(Signed, char_repr)
return hop.gendirectcall(r_chr.ll.ll_char_mul, v_char, v_int)
rtype_inplace_mul = rtype_mul
@@ -492,10 +525,12 @@
#
# _________________________ Conversions _________________________
-class __extend__(pairtype(AbstractCharRepr, AbstractStringRepr)):
+class __extend__(pairtype(AbstractCharRepr, AbstractStringRepr),
+ pairtype(AbstractUniCharRepr, AbstractUnicodeRepr)):
def convert_from_to((r_from, r_to), v, llops):
rstr = llops.rtyper.type_system.rstr
- if r_from == rstr.char_repr and r_to == rstr.string_repr:
+ if (r_from == rstr.char_repr and r_to == rstr.string_repr) or\
+ (r_from == rstr.unichar_repr and r_to == rstr.unicode_repr):
return llops.gendirectcall(r_from.ll.ll_chr2str, v)
return NotImplemented
@@ -519,7 +554,7 @@
class AbstractStringIteratorRepr(IteratorRepr):
def newiter(self, hop):
- string_repr = hop.rtyper.type_system.rstr.string_repr
+ string_repr = hop.args_r[0].repr
v_str, = hop.inputargs(string_repr)
return hop.gendirectcall(self.ll_striter, v_str)
@@ -572,7 +607,7 @@
c = ord(ch)
return 65 <= c <= 90
- def ll_char_islower(ch):
+ def ll_char_islower(ch):
c = ord(ch)
return 97 <= c <= 122
Modified: pypy/dist/pypy/rpython/test/test_rstr.py
==============================================================================
--- pypy/dist/pypy/rpython/test/test_rstr.py (original)
+++ pypy/dist/pypy/rpython/test/test_rstr.py Fri Nov 9 13:53:02 2007
@@ -14,26 +14,31 @@
assert parse('%s') == [('s',)]
assert parse("name '%s' is not defined") == ["name '", ("s",), "' is not defined"]
-class BaseTestRstr(BaseRtypingTest):
-
+class AbstractTestRstr(BaseRtypingTest):
def test_simple(self):
+ const = self.const
def fn(i):
- s = 'hello'
+ s = const('hello')
return s[i]
for i in range(5):
res = self.interpret(fn, [i])
- assert res == 'hello'[i]
+ expected = fn(i)
+ assert res == expected
+ assert res.__class__ is expected.__class__
def test_implicit_index_error(self):
+ const = self.const
def fn(i):
- s = 'hello'
+ s = const('hello')
try:
return s[i]
except IndexError:
- return '*'
+ return const('*')
for i in range(-5, 5):
res = self.interpret(fn, [i])
- assert res == 'hello'[i]
+ expected = fn(i)
+ assert res == expected
+ assert res.__class__ is expected.__class__
res = self.interpret(fn, [5])
assert res == '*'
res = self.interpret(fn, [6])
@@ -42,8 +47,9 @@
assert res == '*'
def test_nonzero(self):
+ const = self.const
def fn(i, j):
- s = ['', 'xx'][j]
+ s = [const(''), const('xx')][j]
if i < 0:
s = None
if i > -2:
@@ -56,9 +62,10 @@
assert res is fn(i, j)
def test_concat(self):
+ const = self.const
def fn(i, j):
- s1 = ['', 'a', 'ab']
- s2 = ['', 'x', 'xy']
+ s1 = [const(''), const('a'), const('ab')]
+ s2 = [const(''), const('x'), const('xy')]
return s1[i] + s2[j]
for i in range(3):
for j in range(3):
@@ -66,8 +73,9 @@
assert self.ll_to_string(res) == fn(i, j)
def test_iter(self):
+ const = self.const
def fn(i):
- s = ['', 'a', 'hello'][i]
+ s = [const(''), const('a'), const('hello')][i]
i = 0
for c in s:
if c != s[i]:
@@ -82,15 +90,17 @@
assert res is True
def test_char_constant(self):
+ const = self.const
def fn(s):
- return s + '.'
- res = self.interpret(fn, ['x'])
+ return s + const('.')
+ res = self.interpret(fn, [const('x')])
res = self.ll_to_string(res)
assert len(res) == 2
- assert res[0] == 'x'
- assert res[1] == '.'
+ assert res[0] == const('x')
+ assert res[1] == const('.')
def test_char_isxxx(self):
+ constchar = self.constchar
def fn(s):
return (s.isspace() |
s.isdigit() << 1 |
@@ -99,93 +109,52 @@
s.isupper() << 4 |
s.islower() << 5)
for i in range(128):
- ch = chr(i)
+ ch = constchar(i)
res = self.interpret(fn, [ch])
assert res == fn(ch)
def test_char_compare(self):
- res = self.interpret(lambda c1, c2: c1 == c2, ['a', 'b'])
+ const = self.const
+ res = self.interpret(lambda c1, c2: c1 == c2, [const('a'),
+ const('b')])
assert res is False
- res = self.interpret(lambda c1, c2: c1 == c2, ['a', 'a'])
+ res = self.interpret(lambda c1, c2: c1 == c2, [const('a'),
+ const('a')])
assert res is True
- res = self.interpret(lambda c1, c2: c1 <= c2, ['z', 'a'])
+ res = self.interpret(lambda c1, c2: c1 <= c2, [const('z'),
+ const('a')])
assert res is False
def test_char_mul(self):
+ const = self.const
def fn(c, mul):
s = c * mul
res = 0
for i in range(len(s)):
- res = res*10 + ord(s[i]) - ord('0')
+ res = res*10 + ord(const(s[i])) - ord(const('0'))
c2 = c
c2 *= mul
res = 10 * res + (c2 == s)
return res
- res = self.interpret(fn, ['3', 5])
+ res = self.interpret(fn, [const('3'), 5])
assert res == 333331
- res = self.interpret(fn, ['5', 3])
+ res = self.interpret(fn, [const('5'), 3])
assert res == 5551
- def test_unichar_const(self):
- def fn(c):
- return c
- assert self.interpret(fn, [u'\u03b1']) == u'\u03b1'
-
- def test_unichar_eq(self):
- def fn(c1, c2):
- return c1 == c2
- assert self.interpret(fn, [u'\u03b1', u'\u03b1']) == True
- assert self.interpret(fn, [u'\u03b1', u'\u03b2']) == False
-
- def test_unichar_ord(self):
- def fn(c):
- return ord(c)
- assert self.interpret(fn, [u'\u03b1']) == ord(u'\u03b1')
-
- def test_unichar_hash(self):
- def fn(c):
- d = {c: 42}
- return d[c]
- assert self.interpret(fn, [u'\u03b1']) == 42
-
- def test_convert_char_to_unichar(self):
- def g(c):
- return ord(c)
- def fn(n):
- if n < 0:
- c = unichr(-n)
- else:
- c = chr(n)
- return g(c)
- assert self.interpret(fn, [65]) == 65
- assert self.interpret(fn, [-5555]) == 5555
-
- def test_char_unichar_eq(self):
- def fn(c1, c2):
- return c1 == c2
- assert self.interpret(fn, [u'(', '(']) == True
- assert self.interpret(fn, [u'\u1028', '(']) == False
- assert self.interpret(fn, ['(', u'(']) == True
- assert self.interpret(fn, ['(', u'\u1028']) == False
-
- def test_char_unichar_eq_2(self):
- def fn(c1):
- return c1 == 'X'
- assert self.interpret(fn, [u'(']) == False
- assert self.interpret(fn, [u'\u1058']) == False
- assert self.interpret(fn, [u'X']) == True
-
def test_is_none(self):
+ const = self.const
def fn(i):
- s1 = ['foo', None][i]
+ s1 = [const('foo'), None][i]
return s1 is None
assert self.interpret(fn, [0]) == False
assert self.interpret(fn, [1]) == True
def test_str_compare(self):
+ const = self.const
def fn(i, j):
- s1 = ['one', 'two', None]
- s2 = ['one', 'two', 'o', 'on', 'twos', 'foobar', None]
+ s1 = [const('one'), const('two'), None]
+ s2 = [const('one'), const('two'), const('o'),
+ const('on'), const('twos'), const('foobar'), None]
return s1[i] == s2[j]
for i in range(3):
for j in range(7):
@@ -193,8 +162,8 @@
assert res is fn(i, j)
def fn(i, j):
- s1 = ['one', 'two']
- s2 = ['one', 'two', 'o', 'on', 'twos', 'foobar']
+ s1 = [const('one'), const('two')]
+ s2 = [const('one'), const('two'), const('o'), const('on'), const('twos'), const('foobar')]
return s1[i] != s2[j]
for i in range(2):
for j in range(6):
@@ -202,8 +171,8 @@
assert res is fn(i, j)
def fn(i, j):
- s1 = ['one', 'two']
- s2 = ['one', 'two', 'o', 'on', 'twos', 'foobar']
+ s1 = [const('one'), const('two')]
+ s2 = [const('one'), const('two'), const('o'), const('on'), const('twos'), const('foobar')]
return s1[i] < s2[j]
for i in range(2):
for j in range(6):
@@ -211,8 +180,8 @@
assert res is fn(i, j)
def fn(i, j):
- s1 = ['one', 'two']
- s2 = ['one', 'two', 'o', 'on', 'twos', 'foobar']
+ s1 = [const('one'), const('two')]
+ s2 = [const('one'), const('two'), const('o'), const('on'), const('twos'), const('foobar')]
return s1[i] <= s2[j]
for i in range(2):
for j in range(6):
@@ -220,8 +189,8 @@
assert res is fn(i, j)
def fn(i, j):
- s1 = ['one', 'two']
- s2 = ['one', 'two', 'o', 'on', 'twos', 'foobar']
+ s1 = [const('one'), const('two')]
+ s2 = [const('one'), const('two'), const('o'), const('on'), const('twos'), const('foobar')]
return s1[i] >= s2[j]
for i in range(2):
for j in range(6):
@@ -229,8 +198,8 @@
assert res is fn(i, j)
def fn(i, j):
- s1 = ['one', 'two']
- s2 = ['one', 'two', 'o', 'on', 'twos', 'foobar']
+ s1 = [const('one'), const('two')]
+ s2 = [const('one'), const('two'), const('o'), const('on'), const('twos'), const('foobar')]
return s1[i] > s2[j]
for i in range(2):
for j in range(6):
@@ -238,9 +207,10 @@
assert res is fn(i, j)
def test_startswith(self):
+ const = self.const
def fn(i, j):
- s1 = ['', 'one', 'two']
- s2 = ['', 'one', 'two', 'o', 'on', 'ne', 'e', 'twos', 'foobar', 'fortytwo']
+ s1 = [const(''), const('one'), const('two')]
+ s2 = [const(''), const('one'), const('two'), const('o'), const('on'), const('ne'), const('e'), const('twos'), const('foobar'), const('fortytwo')]
return s1[i].startswith(s2[j])
for i in range(3):
for j in range(10):
@@ -248,9 +218,10 @@
assert res is fn(i, j)
def test_endswith(self):
+ const = self.const
def fn(i, j):
- s1 = ['', 'one', 'two']
- s2 = ['', 'one', 'two', 'o', 'on', 'ne', 'e', 'twos', 'foobar', 'fortytwo']
+ s1 = [const(''), const('one'), const('two')]
+ s2 = [const(''), const('one'), const('two'), const('o'), const('on'), const('ne'), const('e'), const('twos'), const('foobar'), const('fortytwo')]
return s1[i].endswith(s2[j])
for i in range(3):
for j in range(10):
@@ -258,9 +229,10 @@
assert res is fn(i, j)
def test_find(self):
+ const = self.const
def fn(i, j):
- s1 = ['one two three', 'abc abcdab abcdabcdabde']
- s2 = ['one', 'two', 'abcdab', 'one tou', 'abcdefgh', 'fortytwo', '']
+ s1 = [const('one two three'), const('abc abcdab abcdabcdabde')]
+ s2 = [const('one'), const('two'), const('abcdab'), const('one tou'), const('abcdefgh'), const('fortytwo'), const('')]
return s1[i].find(s2[j])
for i in range(2):
for j in range(7):
@@ -268,80 +240,89 @@
assert res == fn(i, j)
def test_find_with_start(self):
+ const = self.const
def fn(i):
assert i >= 0
- return 'ababcabc'.find('abc', i)
+ return const('ababcabc').find(const('abc'), i)
for i in range(9):
res = self.interpret(fn, [i])
assert res == fn(i)
def test_find_with_start_end(self):
+ const = self.const
def fn(i, j):
assert i >= 0
assert j >= 0
- return 'ababcabc'.find('abc', i, j)
+ return const('ababcabc').find(const('abc'), i, j)
for (i, j) in [(1,7), (2,6), (3,7), (3,8)]:
res = self.interpret(fn, [i, j])
assert res == fn(i, j)
def test_find_empty_string(self):
+ const = self.const
def f(i):
assert i >= 0
- s = "abc"
- x = s.find('')
- x+= s.find('', i)*10
- x+= s.find('', i, i)*100
- x+= s.find('', i, i+1)*1000
+ s = const("abc")
+ x = s.find(const(''))
+ x+= s.find(const(''), i)*10
+ x+= s.find(const(''), i, i)*100
+ x+= s.find(const(''), i, i+1)*1000
return x
for i in range(5):
res = self.interpret(f, [i])
assert res == f(i)
def test_rfind(self):
+ const = self.const
def fn():
- return 'aaa'.rfind('a') + 'aaa'.rfind('a', 1) + 'aaa'.rfind('a', 1, 2)
+ return const('aaa').rfind(const('a')) + const('aaa').rfind(const('a'), 1) + const('aaa').rfind(const('a'), 1, 2)
res = self.interpret(fn, [])
assert res == 2 + 2 + 1
def test_rfind_empty_string(self):
+ const = self.const
def f(i):
assert i >= 0
- s = "abc"
- x = s.find('')
- x+= s.find('', i)*10
- x+= s.find('', i, i)*100
- x+= s.find('', i, i+1)*1000
+ s = const("abc")
+ x = s.find(const(''))
+ x+= s.find(const(''), i)*10
+ x+= s.find(const(''), i, i)*100
+ x+= s.find(const(''), i, i+1)*1000
return x
for i in range(5):
res = self.interpret(f, [i])
assert res == f(i)
def test_find_char(self):
+ const = self.const
def fn(ch):
- pos1 = 'aiuwraz 483'.find(ch)
- pos2 = 'aiuwraz 483'.rfind(ch)
+ pos1 = const('aiuwraz 483').find(ch)
+ pos2 = const('aiuwraz 483').rfind(ch)
return pos1 + (pos2*100)
- for ch in 'a ?3':
+ for ch in const('a ?3'):
res = self.interpret(fn, [ch])
assert res == fn(ch)
def test_strip(self):
+ const = self.const
def both():
- return '!ab!'.strip('!')
+ return const('!ab!').strip(const('!'))
def left():
- return '!ab!'.lstrip('!')
+ return const('!ab!').lstrip(const('!'))
def right():
- return '!ab!'.rstrip('!')
+ return const('!ab!').rstrip(const('!'))
res = self.interpret(both, [])
- assert self.ll_to_string(res) == 'ab'
+ assert self.ll_to_string(res) == const('ab')
res = self.interpret(left, [])
- assert self.ll_to_string(res) == 'ab!'
+ assert self.ll_to_string(res) == const('ab!')
res = self.interpret(right, [])
- assert self.ll_to_string(res) == '!ab'
+ assert self.ll_to_string(res) == const('!ab')
def test_upper(self):
- strings = ['', ' ', 'upper', 'UpPeR', ',uppEr,']
- for i in range(256): strings.append(chr(i))
+ const = self.const
+ constchar = self.constchar
+ strings = [const(''), const(' '), const('upper'), const('UpPeR'), const(',uppEr,')]
+ for i in range(256): strings.append(constchar(i))
def fn(i):
return strings[i].upper()
for i in range(len(strings)):
@@ -349,7 +330,8 @@
assert self.ll_to_string(res) == fn(i)
def test_lower(self):
- strings = ['', ' ', 'lower', 'LoWeR', ',lowEr,']
+ const = self.const
+ strings = [const(''), const(' '), const('lower'), const('LoWeR'), const(',lowEr,')]
for i in range(256): strings.append(chr(i))
def fn(i):
return strings[i].lower()
@@ -358,21 +340,22 @@
assert self.ll_to_string(res) == fn(i)
def test_join(self):
- res = self.interpret(lambda: ''.join([]), [])
+ const = self.const
+ res = self.interpret(lambda: const('').join([]), [])
assert self.ll_to_string(res) == ""
- res = self.interpret(lambda: ''.join(['a', 'b', 'c']), [])
+ res = self.interpret(lambda: const('').join([const('a'), const('b'), const('c')]), [])
assert self.ll_to_string(res) == "abc"
- res = self.interpret(lambda: ''.join(['abc', 'de', 'fghi']), [])
+ res = self.interpret(lambda: const('').join([const('abc'), const('de'), const('fghi')]), [])
assert self.ll_to_string(res) == "abcdefghi"
- res = self.interpret(lambda: '.'.join(['abc', 'def']), [])
- assert self.ll_to_string(res) == 'abc.def'
+ res = self.interpret(lambda: const('.').join([const('abc'), const('def')]), [])
+ assert self.ll_to_string(res) == const('abc.def')
def fn(i, j):
- s1 = [ '', ',', ' and ']
- s2 = [ [], ['foo'], ['bar', 'baz', 'bazz']]
+ s1 = [ const(''), const(','), const(' and ')]
+ s2 = [ [], [const('foo')], [const('bar'), const('baz'), const('bazz')]]
return s1[i].join(s2[j])
for i in range(3):
for j in range(3):
@@ -380,9 +363,9 @@
assert self.ll_to_string(res) == fn(i, j)
def fn(i, j):
- s1 = [ '', ',', ' and ']
- s2 = [ [], ['foo'], ['bar', 'baz', 'bazz']]
- s2[1].extend(['x'])
+ s1 = [ const(''), const(','), const(' and ')]
+ s2 = [ [], [const('foo')], [const('bar'), const('baz'), const('bazz')]]
+ s2[1].extend([const('x')])
return s1[i].join(s2[j])
for i in range(3):
for j in range(3):
@@ -390,73 +373,77 @@
assert self.ll_to_string(res) == fn(i, j)
def test_str_slice(self):
+ const = self.const
def fn():
- s = 'hello'
+ s = const('hello')
s1 = s[:3]
s2 = s[3:]
s3 = s[3:10]
- return s1+s2 == s and s2+s1 == 'lohel' and s1+s3 == s
+ return s1+s2 == s and s2+s1 == const('lohel') and s1+s3 == s
res = self.interpret(fn, ())
assert res
def test_str_slice_minusone(self):
+ const = self.const
def fn():
- s = 'hello'
- z = 'h'
+ s = const('hello')
+ z = const('h')
return s[:-1]+z[:-1]
res = self.interpret(fn, ())
- assert self.ll_to_string(res) == 'hell'
+ assert self.ll_to_string(res) == const('hell')
def test_strformat(self):
+ const = self.const
def percentS(s):
- return "before %s after" % (s,)
+ return const("before %s after") % (s,)
- res = self.interpret(percentS, ['1'])
- assert self.ll_to_string(res) == 'before 1 after'
+ res = self.interpret(percentS, [const('1')])
+ assert self.ll_to_string(res) == const('before 1 after')
def percentD(i):
return "bing %d bang" % (i,)
res = self.interpret(percentD, [23])
- assert self.ll_to_string(res) == 'bing 23 bang'
+ assert self.ll_to_string(res) == const('bing 23 bang')
def percentX(i):
- return "bing %x bang" % (i,)
+ return const("bing %x bang") % (i,)
res = self.interpret(percentX, [23])
- assert self.ll_to_string(res) == 'bing 17 bang'
+ assert self.ll_to_string(res) == const('bing 17 bang')
res = self.interpret(percentX, [-123])
- assert self.ll_to_string(res) == 'bing -7b bang'
+ assert self.ll_to_string(res) == const('bing -7b bang')
def percentO(i):
- return "bing %o bang" % (i,)
+ return const("bing %o bang") % (i,)
res = self.interpret(percentO, [23])
- assert self.ll_to_string(res) == 'bing 27 bang'
+ assert self.ll_to_string(res) == const('bing 27 bang')
res = self.interpret(percentO, [-123])
- assert self.ll_to_string(res) == 'bing -173 bang'
+ assert self.ll_to_string(res) == const('bing -173 bang')
def moreThanOne(s, d, x, o):
- return "string: %s decimal: %d hex: %x oct: %o" % (s, d, x, o)
+ return const("string: %s decimal: %d hex: %x oct: %o") % (s, d, x, o)
- args = 'a', 2, 3, 4
+ args = const('a'), 2, 3, 4
res = self.interpret(moreThanOne, list(args))
assert self.ll_to_string(res) == moreThanOne(*args)
def test_strformat_nontuple(self):
+ const = self.const
def percentD(i):
- return "before %d after" % i
+ return const("before %d after") % i
res = self.interpret(percentD, [1])
- assert self.ll_to_string(res) == 'before 1 after'
+ assert self.ll_to_string(res) == const('before 1 after')
def percentS(i):
- return "before %s after" % i
+ return const("before %s after") % i
- res = self.interpret(percentS, ['D'])
- assert self.ll_to_string(res) == 'before D after'
+ res = self.interpret(percentS, [const('D')])
+ assert self.ll_to_string(res) == const('before D after')
def test_strformat_instance(self):
class C:
@@ -532,52 +519,58 @@
assert res == expected
def test_split(self):
+ const = self.const
def fn(i):
- s = ['', '0.1.2.4.8', '.1.2', '1.2.', '.1.2.4.'][i]
- l = s.split('.')
+ s = [const(''), const('0.1.2.4.8'), const('.1.2'), const('1.2.'), const('.1.2.4.')][i]
+ l = s.split(const('.'))
sum = 0
for num in l:
if len(num):
- sum += ord(num) - ord('0')
+ sum += ord(num) - ord(const('0'))
return sum + len(l) * 100
for i in range(5):
res = self.interpret(fn, [i])
assert res == fn(i)
def test_contains(self):
+ const = self.const
+ constchar = self.constchar
def fn(i):
- s = 'Hello world'
- return chr(i) in s
+ s = const('Hello world')
+ return constchar(i) in s
for i in range(256):
res = self.interpret(fn, [i])#, view=i==42)
assert res == fn(i)
def test_replace(self):
+ const = self.const
def fn(c1, c2):
- s = 'abbccc'
+ s = const('abbccc')
s = s.replace(c1, c2)
res = 0
for c in s:
if c == c2:
res += 1
return res
- res = self.interpret(fn, ['a', 'c'])
+ res = self.interpret(fn, [const('a'), const('c')])
assert res == 4
- res = self.interpret(fn, ['c', 'b'])
+ res = self.interpret(fn, [const('c'), const('b')])
assert res == 5
def test_replace_TyperError(self):
+ const = self.const
def fn():
- s = 'abbccc'
- s = s.replace('a', 'baz')
+ s = const('abbccc')
+ s = s.replace(const('a'), const('baz'))
raises(TyperError, self.interpret, fn, ())
def fn():
- s = 'abbccc'
- s = s.replace('abb', 'c')
+ s = const('abbccc')
+ s = s.replace(const('abb'), const('c'))
raises(TyperError, self.interpret, fn, ())
def test_int(self):
- s1 = [ '42', '01001', 'abc', 'ABC', '4aBc', ' 12ef ', '+42', 'foo', '42foo', '42.1', '', '+ 42']
+ const = self.const
+ s1 = [ const('42'), const('01001'), const('abc'), const('ABC'), const('4aBc'), const(' 12ef '), const('+42'), const('foo'), const('42foo'), const('42.1'), const(''), const('+ 42')]
def fn(i, base):
s = s1[i]
res = int(s, base)
@@ -593,7 +586,8 @@
assert res == expected
def test_int_valueerror(self):
- s1 = ['42g', '?', '+', '+ ']
+ const = self.const
+ s1 = [const('42g'), const('?'), const('+'), const('+ ')]
def fn(i):
try:
return int(s1[i])
@@ -607,7 +601,8 @@
assert res == -654
def test_float(self):
- f = ['', ' ', '0', '1', '-1.5', '1.5E2', '2.5e-1', ' 0 ', '?']
+ const = self.const
+ f = [const(''), const(' '), const('0'), const('1'), const('-1.5'), const('1.5E2'), const('2.5e-1'), const(' 0 '), const('?')]
def fn(i):
s = f[i]
return float(s)
@@ -622,34 +617,38 @@
assert res == expected
def test_char_mul_n(self):
+ const = self.const
def f(c, n):
return c*n
- res = self.interpret(f, ['a', 4])
+ res = self.interpret(f, [const('a'), 4])
assert self.ll_to_string(res) == 'a'*4
- res = self.interpret(f, ['a', 0])
+ res = self.interpret(f, [const('a'), 0])
assert self.ll_to_string(res) == ""
def test_char_mul_negative(self):
+ const = self.const
def f(c):
return c * -3
- res = self.interpret(f, ['a'])
+ res = self.interpret(f, [const('a')])
assert self.ll_to_string(res) == ''
def test_n_mul_char(self):
+ const = self.const
def f(c, n):
return n*c
- res = self.interpret(f, ['a', 4])
+ res = self.interpret(f, [const('a'), 4])
assert self.ll_to_string(res) == 'a'*4
- res = self.interpret(f, ['a', 0])
+ res = self.interpret(f, [const('a'), 0])
assert self.ll_to_string(res) == ""
def test_hash(self):
+ const = self.const
def fn(i):
if i == 0:
- s = ''
+ s = const('')
else:
- s = "xxx"
+ s = const("xxx")
return hash(s)
res = self.interpret(fn, [0])
assert res == self.EMPTY_STRING_HASH
@@ -657,45 +656,52 @@
assert typeOf(res) == Signed
def test_call_str_on_string(self):
+ const = self.const
def fn(i):
- s = "x" * i
- return str(s)
+ s = const("x") * i
+ return const(s)
res = self.interpret(fn, [3])
assert self.ll_to_string(res) == 'xxx'
def test_count_char(self):
+ const = self.const
def fn(i):
- s = "".join(["abcasd"] * i)
- return s.count("a") + s.count("a", 2) + s.count("b", 1, 6)
+ s = const("").join([const("abcasd")] * i)
+ return s.count(const("a")) + s.count(const("a"), 2) + \
+ s.count(const("b"), 1, 6)
res = self.interpret(fn, [4])
assert res == 8 + 7 + 1
def test_count(self):
+ const = self.const
def fn(i):
- s = "".join(["abcabsd"] * i)
+ s = const("").join([const("abcabsd")] * i)
one = i / i # confuse the annotator
- return (s.count("abc") + "abcde".count("") +
- "abcda".count("a" * one))
+ return (s.count(const("abc")) + const("abcde").count(const("")) +
+ const("abcda").count(const("a") * one))
res = self.interpret(fn, [4])
assert res == 4 + 6 + 2
def test_count_overlapping_occurences(self):
+ const = self.const
def fn():
- return 'ababa'.count('aba')
+ return const('ababa').count(const('aba'))
res = self.interpret(fn, [])
assert res == 1
def test_hlstr(self):
+ const = self.const
from pypy.rpython.annlowlevel import hlstr
def f(s):
- return "*"+hlstr(s)+"*" == "*abba*"
+ return const("*")+const(hlstr(s))+const("*") == const("*abba*")
- res = self.interpret(f, [self.string_to_ll("abba")])
+ res = self.interpret(f, [self.string_to_ll(const("abba"))])
assert res
def test_getitem_exc(self):
+ const = self.const
def f(x):
- s = "z"
+ s = const("z")
return s[x]
res = self.interpret(f, [0])
@@ -708,13 +714,13 @@
assert False
def f(x):
- s = "z"
+ s = const("z")
try:
return s[x]
except IndexError:
- return 'X'
+ return const('X')
except Exception:
- return ' '
+ return const(' ')
res = self.interpret(f, [0])
assert res == 'z'
@@ -722,11 +728,11 @@
assert res == 'X'
def f(x):
- s = "z"
+ s = const("z")
try:
return s[x]
except Exception:
- return ' '
+ return const(' ')
res = self.interpret(f, [0])
assert res == 'z'
@@ -734,11 +740,11 @@
assert res == ' '
def f(x):
- s = "z"
+ s = const("z")
try:
return s[x]
except ValueError:
- return ' '
+ return const(' ')
res = self.interpret(f, [0])
assert res == 'z'
@@ -750,10 +756,11 @@
assert False
def test_fold_concat(self):
+ const = self.const
def g(tail):
- return "head"+tail
+ return const("head")+tail
def f():
- return g("tail")
+ return g(const("tail"))
from pypy import conftest
t, typer, fgraph = self.gengraph(f, [], backendopt=True)
@@ -779,6 +786,10 @@
res = interpret(g, [-2])
assert res._obj.value == 42
+class BaseTestRstr(AbstractTestRstr):
+ const = str
+ constchar = chr
+
class TestLLtype(BaseTestRstr, LLRtypeMixin):
EMPTY_STRING_HASH = -1
Modified: pypy/dist/pypy/rpython/test/test_runicode.py
==============================================================================
--- pypy/dist/pypy/rpython/test/test_runicode.py (original)
+++ pypy/dist/pypy/rpython/test/test_runicode.py Fri Nov 9 13:53:02 2007
@@ -1,19 +1,193 @@
-from pypy.rpython.test.tool import BaseRtypingTest, LLRtypeMixin
+from pypy.rpython.test.tool import LLRtypeMixin, OORtypeMixin
+from pypy.rpython.test.test_rstr import AbstractTestRstr
+import py
-class BaseTestRUnicode(BaseRtypingTest):
- def test_simple(self):
- def f(n):
- if n % 2 == 0:
- x = 'xxx'
- else:
- x = u'x\u221Ex'
- return x[n]
-
- for i in range(0, 3):
- res = self.interpret(f, [i])
- assert res == f(i)
+# ====> test_rstr.py
+
+class BaseTestRUnicode(AbstractTestRstr):
+ const = unicode
+ constchar = unichr
+
+ def test_unicode_explicit_conv(self):
+ def f(x):
+ return unicode(x)
+
+ for v in ['x', u'x']:
+ res = self.interpret(f, [v])
+ assert self.ll_to_unicode(res) == v
+
+ def f(x):
+ if x > 1:
+ y = const('yxx')
+ else:
+ y = const('xx')
+ return unicode(y)
+
+ const = str
+ assert self.ll_to_unicode(self.interpret(f, [1])) == f(1)
+
+ def f(x):
+ if x > 1:
+ y = const('yxx')
+ else:
+ y = const('xx')
+ return unicode(y)
+
+ # a copy, because llinterp caches functions
+
+ const = unicode
+ assert self.ll_to_unicode(self.interpret(f, [1])) == f(1)
+
+ def test_str_unicode_const(self):
+ def f():
+ return str(u'xxx')
+
+ assert self.ll_to_string(self.interpret(f, [])) == 'xxx'
+
+ def test_conversion_errors(self):
+ py.test.skip("do we want this test to pass?")
+ def f(x):
+ if x:
+ string = '\x80\x81'
+ uni = u'\x80\x81'
+ else:
+ string = '\x82\x83'
+ uni = u'\x83\x84\x84'
+ try:
+ str(uni)
+ except UnicodeEncodeError:
+ pass
+ else:
+ return -1
+ try:
+ unicode(string)
+ except UnicodeDecodeError:
+ return len(string) + len(uni)
+ else:
+ return -2
+ assert f(True) == 4
+ assert f(False) == 5
+ res = self.interpret(f, [True])
+ assert res == 4
+
+
+ def test_str_unicode_nonconst(self):
+ def f(x):
+ y = u'xxx' + unichr(x)
+ return str(y)
+
+ assert self.ll_to_string(self.interpret(f, [38])) == f(38)
+ self.interpret_raises(UnicodeEncodeError, f, [1234])
+
+ def test_unicode_encode(self):
+ def f(x):
+ y = u'xxx'
+ return (y + unichr(x)).encode('ascii')
+
+ assert self.ll_to_string(self.interpret(f, [38])) == f(38)
+
+ def test_unicode_encode_error(self):
+ def f(x):
+ y = u'xxx'
+ try:
+ x = (y + unichr(x)).encode('ascii')
+ return len(x)
+ except UnicodeEncodeError:
+ return -1
+
+ assert self.interpret(f, [38]) == f(38)
+ assert self.interpret(f, [138]) == f(138)
+
+ def test_unicode_decode(self):
+ def f(x):
+ y = 'xxx'
+ return (y + chr(x)).decode('ascii')
+
+ assert self.ll_to_string(self.interpret(f, [38])) == f(38)
+
+ def test_unicode_decode_error(self):
+ def f(x):
+ y = 'xxx'
+ try:
+ x = (y + chr(x)).decode('ascii')
+ return len(x)
+ except UnicodeDecodeError:
+ return -1
+
+ assert self.interpret(f, [38]) == f(38)
+ assert self.interpret(f, [138]) == f(138)
+
+
+ def test_unichar_const(self):
+ def fn(c):
+ return c
+ assert self.interpret(fn, [u'\u03b1']) == u'\u03b1'
+
+ def test_unichar_eq(self):
+ def fn(c1, c2):
+ return c1 == c2
+ assert self.interpret(fn, [u'\u03b1', u'\u03b1']) == True
+ assert self.interpret(fn, [u'\u03b1', u'\u03b2']) == False
+
+ def test_unichar_ord(self):
+ def fn(c):
+ return ord(c)
+ assert self.interpret(fn, [u'\u03b1']) == ord(u'\u03b1')
+
+ def test_unichar_hash(self):
+ def fn(c):
+ d = {c: 42}
+ return d[c]
+ assert self.interpret(fn, [u'\u03b1']) == 42
+
+ def test_convert_char_to_unichar(self):
+ def g(c):
+ return ord(c)
+ def fn(n):
+ if n < 0:
+ c = unichr(-n)
+ else:
+ c = chr(n)
+ return g(c)
+ assert self.interpret(fn, [65]) == 65
+ assert self.interpret(fn, [-5555]) == 5555
+
+ def test_char_unichar_eq(self):
+ def fn(c1, c2):
+ return c1 == c2
+ assert self.interpret(fn, [u'(', '(']) == True
+ assert self.interpret(fn, [u'\u1028', '(']) == False
+ assert self.interpret(fn, ['(', u'(']) == True
+ assert self.interpret(fn, ['(', u'\u1028']) == False
+
+ def test_char_unichar_eq_2(self):
+ def fn(c1):
+ return c1 == 'X'
+ assert self.interpret(fn, [u'(']) == False
+ assert self.interpret(fn, [u'\u1058']) == False
+ assert self.interpret(fn, [u'X']) == True
+
+ def unsupported(self):
+ py.test.skip("not supported")
+
+ test_char_isxxx = unsupported
+ test_upper = unsupported
+ test_lower = unsupported
+ test_strformat = unsupported
+ test_strformat_instance = unsupported
+ test_strformat_nontuple = unsupported
+ test_percentformat_instance = unsupported
+ test_percentformat_tuple = unsupported
+ test_percentformat_list = unsupported
+ test_int = unsupported
+ test_int_valueerror = unsupported
+ test_float = unsupported
+ test_hlstr = unsupported
class TestLLtype(BaseTestRUnicode, LLRtypeMixin):
- pass
+ EMPTY_STRING_HASH = -1
+
+class TestOOtype(BaseTestRUnicode, OORtypeMixin):
+ EMPTY_STRING_HASH = 0
Modified: pypy/dist/pypy/rpython/test/tool.py
==============================================================================
--- pypy/dist/pypy/rpython/test/tool.py (original)
+++ pypy/dist/pypy/rpython/test/tool.py Fri Nov 9 13:53:02 2007
@@ -34,10 +34,17 @@
def ll_to_string(self, s):
return ''.join(s.chars)
+ def ll_to_unicode(self, s):
+ return u''.join(s.chars)
+
def string_to_ll(self, s):
from pypy.rpython.module.support import LLSupport
return LLSupport.to_rstr(s)
+ def unicode_to_ll(self, s):
+ from pypy.rpython.module.support import LLSupport
+ return LLSupport.to_runicode(s)
+
def ll_to_list(self, l):
r = []
items = l.ll_items()
@@ -75,6 +82,8 @@
def ll_to_string(self, s):
return s._str
+ ll_to_unicode = ll_to_string
+
def string_to_ll(self, s):
from pypy.rpython.module.support import OOSupport
return OOSupport.to_rstr(s)
Modified: pypy/dist/pypy/translator/test/test_geninterp.py
==============================================================================
--- pypy/dist/pypy/translator/test/test_geninterp.py (original)
+++ pypy/dist/pypy/translator/test/test_geninterp.py Fri Nov 9 13:53:02 2007
@@ -44,6 +44,9 @@
def import_sys_func():
import sys
return sys.__name__
+
+ def unicode_test(x):
+ return unicode(x, 'ascii')
"""
def setup_class(cls):
@@ -275,3 +278,9 @@
fn = self.build_interpfunc(snippet.t_neg_long)
result = fn()
assert result == -132L and type(result) is long
+
+ def test_unicode_with_encoding(self):
+ fn = self.build_interpfunc(snippet.unicode_test)
+ result = fn("abc")
+ assert result == u"abc" and type(result) is unicode
+
More information about the Pypy-commit
mailing list