[pypy-commit] pypy default: Issue #2857

arigo pypy.commits at gmail.com
Sat Jul 14 03:47:49 EDT 2018


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r94866:fb05c07c73c5
Date: 2018-07-14 09:46 +0200
http://bitbucket.org/pypy/pypy/changeset/fb05c07c73c5/

Log:	Issue #2857

	Fix .casefold() in some cases, for py3.5

diff --git a/rpython/rlib/unicodedata/generate_unicodedb.py b/rpython/rlib/unicodedata/generate_unicodedb.py
--- a/rpython/rlib/unicodedata/generate_unicodedb.py
+++ b/rpython/rlib/unicodedata/generate_unicodedb.py
@@ -913,8 +913,17 @@
 
     casefolds = {}
     for code, char in table.enum_chars():
-        if char.casefolding and char.casefolding != [char.lower]:
-            casefolds[code] = char.casefolding
+        full_casefold = char.casefolding
+        if full_casefold is None:
+            full_casefold = [code]
+        full_lower = char.lower
+        if full_lower is None:
+            full_lower = code
+        # if we don't write anything into the file, then the RPython
+        # program would compute the result 'full_lower' instead.
+        # Is that the right answer?
+        if full_casefold != [full_lower]:
+            casefolds[code] = full_casefold
     writeDict(outfile, '_casefolds', casefolds, base_mod)
     print >> outfile, '''
 
diff --git a/rpython/rlib/unicodedata/test/test_unicodedata.py b/rpython/rlib/unicodedata/test/test_unicodedata.py
--- a/rpython/rlib/unicodedata/test/test_unicodedata.py
+++ b/rpython/rlib/unicodedata/test/test_unicodedata.py
@@ -148,3 +148,15 @@
     def test_changed_in_version_8(self):
         assert unicodedb_6_2_0.toupper_full(0x025C) == [0x025C]
         assert unicodedb_8_0_0.toupper_full(0x025C) == [0xA7AB]
+
+    def test_casefold(self):
+        # returns None when we have no special casefolding rule,
+        # which means that tolower_full() should be used instead
+        assert unicodedb_8_0_0.casefold_lookup(0x1000) == None
+        assert unicodedb_8_0_0.casefold_lookup(0x0061) == None
+        assert unicodedb_8_0_0.casefold_lookup(0x0041) == None
+        # a case where casefold() != lower()
+        assert unicodedb_8_0_0.casefold_lookup(0x00DF) == [ord('s'), ord('s')]
+        # returns the argument itself, and not None, in rare cases
+        # where tolower_full() would return something different
+        assert unicodedb_8_0_0.casefold_lookup(0x13A0) == [0x13A0]
diff --git a/rpython/rlib/unicodedata/unicodedb_8_0_0.py b/rpython/rlib/unicodedata/unicodedb_8_0_0.py
--- a/rpython/rlib/unicodedata/unicodedb_8_0_0.py
+++ b/rpython/rlib/unicodedata/unicodedb_8_0_0.py
@@ -21307,6 +21307,92 @@
         return code
 
 _casefolds = {
+5024: [5024],
+5025: [5025],
+5026: [5026],
+5027: [5027],
+5028: [5028],
+5029: [5029],
+5030: [5030],
+5031: [5031],
+5032: [5032],
+5033: [5033],
+5034: [5034],
+5035: [5035],
+5036: [5036],
+5037: [5037],
+5038: [5038],
+5039: [5039],
+5040: [5040],
+5041: [5041],
+5042: [5042],
+5043: [5043],
+5044: [5044],
+5045: [5045],
+5046: [5046],
+5047: [5047],
+5048: [5048],
+5049: [5049],
+5050: [5050],
+5051: [5051],
+5052: [5052],
+5053: [5053],
+5054: [5054],
+5055: [5055],
+5056: [5056],
+5057: [5057],
+5058: [5058],
+5059: [5059],
+5060: [5060],
+5061: [5061],
+5062: [5062],
+5063: [5063],
+5064: [5064],
+5065: [5065],
+5066: [5066],
+5067: [5067],
+5068: [5068],
+5069: [5069],
+5070: [5070],
+5071: [5071],
+5072: [5072],
+5073: [5073],
+5074: [5074],
+5075: [5075],
+5076: [5076],
+5077: [5077],
+5078: [5078],
+5079: [5079],
+5080: [5080],
+5081: [5081],
+5082: [5082],
+5083: [5083],
+5084: [5084],
+5085: [5085],
+5086: [5086],
+5087: [5087],
+5088: [5088],
+5089: [5089],
+5090: [5090],
+5091: [5091],
+5092: [5092],
+5093: [5093],
+5094: [5094],
+5095: [5095],
+5096: [5096],
+5097: [5097],
+5098: [5098],
+5099: [5099],
+5100: [5100],
+5101: [5101],
+5102: [5102],
+5103: [5103],
+5104: [5104],
+5105: [5105],
+5106: [5106],
+5107: [5107],
+5108: [5108],
+5109: [5109],
 5112: [5104],
 5113: [5105],
 5114: [5106],
diff --git a/rpython/rlib/unicodedata/unicodedb_9_0_0.py b/rpython/rlib/unicodedata/unicodedb_9_0_0.py
--- a/rpython/rlib/unicodedata/unicodedb_9_0_0.py
+++ b/rpython/rlib/unicodedata/unicodedb_9_0_0.py
@@ -24430,6 +24430,92 @@
         return code
 
 _casefolds = {
+5024: [5024],
+5025: [5025],
+5026: [5026],
+5027: [5027],
+5028: [5028],
+5029: [5029],
+5030: [5030],
+5031: [5031],
+5032: [5032],
+5033: [5033],
+5034: [5034],
+5035: [5035],
+5036: [5036],
+5037: [5037],
+5038: [5038],
+5039: [5039],
+5040: [5040],
+5041: [5041],
+5042: [5042],
+5043: [5043],
+5044: [5044],
+5045: [5045],
+5046: [5046],
+5047: [5047],
+5048: [5048],
+5049: [5049],
+5050: [5050],
+5051: [5051],
+5052: [5052],
+5053: [5053],
+5054: [5054],
+5055: [5055],
+5056: [5056],
+5057: [5057],
+5058: [5058],
+5059: [5059],
+5060: [5060],
+5061: [5061],
+5062: [5062],
+5063: [5063],
+5064: [5064],
+5065: [5065],
+5066: [5066],
+5067: [5067],
+5068: [5068],
+5069: [5069],
+5070: [5070],
+5071: [5071],
+5072: [5072],
+5073: [5073],
+5074: [5074],
+5075: [5075],
+5076: [5076],
+5077: [5077],
+5078: [5078],
+5079: [5079],
+5080: [5080],
+5081: [5081],
+5082: [5082],
+5083: [5083],
+5084: [5084],
+5085: [5085],
+5086: [5086],
+5087: [5087],
+5088: [5088],
+5089: [5089],
+5090: [5090],
+5091: [5091],
+5092: [5092],
+5093: [5093],
+5094: [5094],
+5095: [5095],
+5096: [5096],
+5097: [5097],
+5098: [5098],
+5099: [5099],
+5100: [5100],
+5101: [5101],
+5102: [5102],
+5103: [5103],
+5104: [5104],
+5105: [5105],
+5106: [5106],
+5107: [5107],
+5108: [5108],
+5109: [5109],
 5112: [5104],
 5113: [5105],
 5114: [5106],


More information about the pypy-commit mailing list