[pypy-commit] pypy default: Issue #2857
arigo
pypy.commits at gmail.com
Sat Jul 14 03:47:49 EDT 2018
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r94866:fb05c07c73c5
Date: 2018-07-14 09:46 +0200
http://bitbucket.org/pypy/pypy/changeset/fb05c07c73c5/
Log: Issue #2857
Fix .casefold() in some cases, for py3.5
diff --git a/rpython/rlib/unicodedata/generate_unicodedb.py b/rpython/rlib/unicodedata/generate_unicodedb.py
--- a/rpython/rlib/unicodedata/generate_unicodedb.py
+++ b/rpython/rlib/unicodedata/generate_unicodedb.py
@@ -913,8 +913,17 @@
casefolds = {}
for code, char in table.enum_chars():
- if char.casefolding and char.casefolding != [char.lower]:
- casefolds[code] = char.casefolding
+ full_casefold = char.casefolding
+ if full_casefold is None:
+ full_casefold = [code]
+ full_lower = char.lower
+ if full_lower is None:
+ full_lower = code
+ # if we don't write anything into the file, then the RPython
+ # program would compute the result 'full_lower' instead.
+ # Is that the right answer?
+ if full_casefold != [full_lower]:
+ casefolds[code] = full_casefold
writeDict(outfile, '_casefolds', casefolds, base_mod)
print >> outfile, '''
diff --git a/rpython/rlib/unicodedata/test/test_unicodedata.py b/rpython/rlib/unicodedata/test/test_unicodedata.py
--- a/rpython/rlib/unicodedata/test/test_unicodedata.py
+++ b/rpython/rlib/unicodedata/test/test_unicodedata.py
@@ -148,3 +148,15 @@
def test_changed_in_version_8(self):
assert unicodedb_6_2_0.toupper_full(0x025C) == [0x025C]
assert unicodedb_8_0_0.toupper_full(0x025C) == [0xA7AB]
+
+ def test_casefold(self):
+ # returns None when we have no special casefolding rule,
+ # which means that tolower_full() should be used instead
+ assert unicodedb_8_0_0.casefold_lookup(0x1000) == None
+ assert unicodedb_8_0_0.casefold_lookup(0x0061) == None
+ assert unicodedb_8_0_0.casefold_lookup(0x0041) == None
+ # a case where casefold() != lower()
+ assert unicodedb_8_0_0.casefold_lookup(0x00DF) == [ord('s'), ord('s')]
+ # returns the argument itself, and not None, in rare cases
+ # where tolower_full() would return something different
+ assert unicodedb_8_0_0.casefold_lookup(0x13A0) == [0x13A0]
diff --git a/rpython/rlib/unicodedata/unicodedb_8_0_0.py b/rpython/rlib/unicodedata/unicodedb_8_0_0.py
--- a/rpython/rlib/unicodedata/unicodedb_8_0_0.py
+++ b/rpython/rlib/unicodedata/unicodedb_8_0_0.py
@@ -21307,6 +21307,92 @@
return code
_casefolds = {
+5024: [5024],
+5025: [5025],
+5026: [5026],
+5027: [5027],
+5028: [5028],
+5029: [5029],
+5030: [5030],
+5031: [5031],
+5032: [5032],
+5033: [5033],
+5034: [5034],
+5035: [5035],
+5036: [5036],
+5037: [5037],
+5038: [5038],
+5039: [5039],
+5040: [5040],
+5041: [5041],
+5042: [5042],
+5043: [5043],
+5044: [5044],
+5045: [5045],
+5046: [5046],
+5047: [5047],
+5048: [5048],
+5049: [5049],
+5050: [5050],
+5051: [5051],
+5052: [5052],
+5053: [5053],
+5054: [5054],
+5055: [5055],
+5056: [5056],
+5057: [5057],
+5058: [5058],
+5059: [5059],
+5060: [5060],
+5061: [5061],
+5062: [5062],
+5063: [5063],
+5064: [5064],
+5065: [5065],
+5066: [5066],
+5067: [5067],
+5068: [5068],
+5069: [5069],
+5070: [5070],
+5071: [5071],
+5072: [5072],
+5073: [5073],
+5074: [5074],
+5075: [5075],
+5076: [5076],
+5077: [5077],
+5078: [5078],
+5079: [5079],
+5080: [5080],
+5081: [5081],
+5082: [5082],
+5083: [5083],
+5084: [5084],
+5085: [5085],
+5086: [5086],
+5087: [5087],
+5088: [5088],
+5089: [5089],
+5090: [5090],
+5091: [5091],
+5092: [5092],
+5093: [5093],
+5094: [5094],
+5095: [5095],
+5096: [5096],
+5097: [5097],
+5098: [5098],
+5099: [5099],
+5100: [5100],
+5101: [5101],
+5102: [5102],
+5103: [5103],
+5104: [5104],
+5105: [5105],
+5106: [5106],
+5107: [5107],
+5108: [5108],
+5109: [5109],
5112: [5104],
5113: [5105],
5114: [5106],
diff --git a/rpython/rlib/unicodedata/unicodedb_9_0_0.py b/rpython/rlib/unicodedata/unicodedb_9_0_0.py
--- a/rpython/rlib/unicodedata/unicodedb_9_0_0.py
+++ b/rpython/rlib/unicodedata/unicodedb_9_0_0.py
@@ -24430,6 +24430,92 @@
return code
_casefolds = {
+5024: [5024],
+5025: [5025],
+5026: [5026],
+5027: [5027],
+5028: [5028],
+5029: [5029],
+5030: [5030],
+5031: [5031],
+5032: [5032],
+5033: [5033],
+5034: [5034],
+5035: [5035],
+5036: [5036],
+5037: [5037],
+5038: [5038],
+5039: [5039],
+5040: [5040],
+5041: [5041],
+5042: [5042],
+5043: [5043],
+5044: [5044],
+5045: [5045],
+5046: [5046],
+5047: [5047],
+5048: [5048],
+5049: [5049],
+5050: [5050],
+5051: [5051],
+5052: [5052],
+5053: [5053],
+5054: [5054],
+5055: [5055],
+5056: [5056],
+5057: [5057],
+5058: [5058],
+5059: [5059],
+5060: [5060],
+5061: [5061],
+5062: [5062],
+5063: [5063],
+5064: [5064],
+5065: [5065],
+5066: [5066],
+5067: [5067],
+5068: [5068],
+5069: [5069],
+5070: [5070],
+5071: [5071],
+5072: [5072],
+5073: [5073],
+5074: [5074],
+5075: [5075],
+5076: [5076],
+5077: [5077],
+5078: [5078],
+5079: [5079],
+5080: [5080],
+5081: [5081],
+5082: [5082],
+5083: [5083],
+5084: [5084],
+5085: [5085],
+5086: [5086],
+5087: [5087],
+5088: [5088],
+5089: [5089],
+5090: [5090],
+5091: [5091],
+5092: [5092],
+5093: [5093],
+5094: [5094],
+5095: [5095],
+5096: [5096],
+5097: [5097],
+5098: [5098],
+5099: [5099],
+5100: [5100],
+5101: [5101],
+5102: [5102],
+5103: [5103],
+5104: [5104],
+5105: [5105],
+5106: [5106],
+5107: [5107],
+5108: [5108],
+5109: [5109],
5112: [5104],
5113: [5105],
5114: [5106],
More information about the pypy-commit
mailing list