[pypy-commit] pypy py3.3: Unicodedb: Add support for Aliases.
amauryfa
noreply at buildbot.pypy.org
Mon Mar 16 01:16:26 CET 2015
Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: py3.3
Changeset: r76391:06f9a5ad6287
Date: 2015-03-16 01:15 +0100
http://bitbucket.org/pypy/pypy/changeset/06f9a5ad6287/
Log: Unicodedb: Add support for Aliases.
diff too long, truncating to 2000 out of 172399 lines
diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py
--- a/pypy/module/unicodedata/interp_ucd.py
+++ b/pypy/module/unicodedata/interp_ucd.py
@@ -75,7 +75,7 @@
class UCD(W_Root):
def __init__(self, unicodedb):
- self._lookup = unicodedb.lookup
+ self._lookup = unicodedb.lookup_with_alias
self._lookup_named_sequence = unicodedb.lookup_named_sequence
self._name = unicodedb.name
self._decimal = unicodedb.decimal
diff --git a/pypy/module/unicodedata/test/test_unicodedata.py b/pypy/module/unicodedata/test/test_unicodedata.py
--- a/pypy/module/unicodedata/test/test_unicodedata.py
+++ b/pypy/module/unicodedata/test/test_unicodedata.py
@@ -107,6 +107,27 @@
import unicodedata
raises(TypeError, unicodedata.bidirectional, 'xx')
+ def test_aliases(self):
+ import unicodedata
+ aliases = [
+ ('LATIN CAPITAL LETTER GHA', 0x01A2),
+ ('LATIN SMALL LETTER GHA', 0x01A3),
+ ('KANNADA LETTER LLLA', 0x0CDE),
+ ('LAO LETTER FO FON', 0x0E9D),
+ ('LAO LETTER FO FAY', 0x0E9F),
+ ('LAO LETTER RO', 0x0EA3),
+ ('LAO LETTER LO', 0x0EA5),
+ ('TIBETAN MARK BKA- SHOG GI MGO RGYAN', 0x0FD0),
+ ('YI SYLLABLE ITERATION MARK', 0xA015),
+ ('PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET', 0xFE18),
+ ('BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS', 0x1D0C5)
+ ]
+ for alias, codepoint in aliases:
+ name = unicodedata.name(chr(codepoint))
+ assert name != alias
+ assert unicodedata.lookup(alias) == unicodedata.lookup(name)
+ raises(KeyError, unicodedata.ucd_3_2_0.lookup, alias)
+
def test_named_sequences(self):
import unicodedata
sequences = [
diff --git a/rpython/rlib/unicodedata/generate_unicodedb.py b/rpython/rlib/unicodedata/generate_unicodedb.py
--- a/rpython/rlib/unicodedata/generate_unicodedb.py
+++ b/rpython/rlib/unicodedata/generate_unicodedb.py
@@ -88,10 +88,11 @@
class UnicodeData(object):
# we use this range of PUA_15 to store name aliases and named sequences
NAME_ALIASES_START = 0xF0000
- NAMED_SEQUENCES_START = 0xF0100
+ NAMED_SEQUENCES_START = 0xF0200
def __init__(self):
self.table = [None] * (MAXUNICODE + 1)
+ self.aliases = []
self.named_sequences = []
def add_char(self, code, char):
@@ -149,6 +150,12 @@
self.table[code].canonical_decomp = result
return self.table[code].canonical_decomp
+ def add_alias(self, name, char):
+ pua_index = self.NAME_ALIASES_START + len(self.aliases)
+ self.aliases.append((name, char))
+ # also store the name in the PUA 1
+ self.table[pua_index].name = name
+
def add_named_sequence(self, name, chars):
pua_index = self.NAMED_SEQUENCES_START + len(self.named_sequences)
self.named_sequences.append((name, chars))
@@ -262,6 +269,16 @@
table.get_canonical_decomposition(code)
table.get_compat_decomposition(code)
+ # Name aliases
+ for line in files['name_aliases']:
+ line = line.strip()
+ if not line or line.startswith('#'):
+ continue
+ items = line.split(';')
+ char = int(items[0], 16)
+ name = items[1]
+ table.add_alias(name, char)
+
# Named sequences
for line in files['named_sequences']:
line = line.strip()
@@ -786,7 +803,21 @@
return None
''' % dict(start=table.NAMED_SEQUENCES_START)
-
+ # aliases
+ print >> outfile, '_name_aliases = ['
+ for name, char in table.aliases:
+ print >> outfile, "%s," % (char,)
+ print >> outfile, ']'
+ print >> outfile, '''
+
+def lookup_with_alias(name):
+ code = lookup(name)
+ if 0 <= code - %(start)s < len(_name_aliases):
+ return _name_aliases[code - %(start)s]
+ else:
+ return code
+''' % dict(start=table.NAME_ALIASES_START)
+
def main():
import sys
diff --git a/rpython/rlib/unicodedata/unicodedb_3_2_0.py b/rpython/rlib/unicodedata/unicodedb_3_2_0.py
--- a/rpython/rlib/unicodedata/unicodedb_3_2_0.py
+++ b/rpython/rlib/unicodedata/unicodedb_3_2_0.py
@@ -493,6 +493,7 @@
6679: None,
6682: None,
6680: None,
+983050: None,
6322: None,
6321: None,
6387: None,
@@ -3668,7 +3669,7 @@
9965: None,
4346: None,
4345: None,
-983646: None,
+983902: None,
11520: None,
11521: None,
11546: None,
@@ -4439,13 +4440,14 @@
69815: None,
69811: None,
69812: None,
+983042: None,
3261: None,
3313: None,
3260: None,
3314: None,
3298: None,
3299: None,
-983688: None,
+983944: None,
43272: None,
43269: None,
43268: None,
@@ -4559,44 +4561,44 @@
68102: None,
68098: None,
68099: None,
-983667: None,
-983652: None,
-983653: None,
-983655: None,
-983654: None,
-983657: None,
-983659: None,
-983679: None,
-983647: None,
-983648: None,
-983650: None,
-983649: None,
-983680: None,
-983674: None,
-983671: None,
-983661: None,
-983651: None,
-983666: None,
-983656: None,
-983668: None,
-983670: None,
-983669: None,
-983673: None,
-983678: None,
-983676: None,
-983677: None,
-983662: None,
-983663: None,
-983665: None,
-983664: None,
-983658: None,
-983660: None,
-983675: None,
-983672: None,
-983685: None,
-983682: None,
-983683: None,
-983684: None,
+983923: None,
+983908: None,
+983909: None,
+983911: None,
+983910: None,
+983913: None,
+983915: None,
+983935: None,
+983903: None,
+983904: None,
+983906: None,
+983905: None,
+983936: None,
+983930: None,
+983927: None,
+983917: None,
+983907: None,
+983922: None,
+983912: None,
+983924: None,
+983926: None,
+983925: None,
+983929: None,
+983934: None,
+983932: None,
+983933: None,
+983918: None,
+983919: None,
+983921: None,
+983920: None,
+983914: None,
+983916: None,
+983931: None,
+983928: None,
+983941: None,
+983938: None,
+983939: None,
+983940: None,
6109: None,
6627: None,
6643: None,
@@ -4640,16 +4642,20 @@
6631: None,
6647: None,
6640: None,
-983687: None,
-983681: None,
-983686: None,
+983943: None,
+983937: None,
+983942: None,
+983044: None,
+983043: None,
+983046: None,
+983045: None,
68413: None,
68415: None,
68412: None,
68414: None,
-983296: None,
-983322: None,
-983324: None,
+983552: None,
+983578: None,
+983580: None,
570: None,
42802: None,
11373: None,
@@ -4664,26 +4670,27 @@
42862: None,
42796: None,
42798: None,
-983306: None,
-983304: None,
-983330: None,
-983332: None,
-983326: None,
-983328: None,
+983562: None,
+983560: None,
+983586: None,
+983588: None,
+983582: None,
+983584: None,
582: None,
-983298: None,
-983302: None,
-983300: None,
+983554: None,
+983558: None,
+983556: None,
42788: None,
42786: None,
42858: None,
+983040: None,
577: None,
11367: None,
11381: None,
42790: None,
-983308: None,
-983336: None,
-983338: None,
+983564: None,
+983592: None,
+983594: None,
42873: None,
42875: None,
42877: None,
@@ -4692,7 +4699,7 @@
42886: None,
42860: None,
584: None,
-983340: None,
+983596: None,
11369: None,
42818: None,
42816: None,
@@ -4701,16 +4708,16 @@
11360: None,
42824: None,
11362: None,
-983342: None,
+983598: None,
11374: None,
-983344: None,
+983600: None,
7930: None,
7932: None,
42826: None,
42828: None,
-983312: None,
-983316: None,
-983314: None,
+983568: None,
+983572: None,
+983570: None,
42830: None,
42834: None,
42836: None,
@@ -4721,11 +4728,11 @@
42842: None,
588: None,
11364: None,
-983346: None,
+983602: None,
42814: None,
42844: None,
11390: None,
-983318: None,
+983574: None,
42891: None,
7838: None,
586: None,
@@ -4740,11 +4747,11 @@
581: None,
42792: None,
580: None,
-983352: None,
-983320: None,
-983354: None,
-983348: None,
-983350: None,
+983608: None,
+983576: None,
+983610: None,
+983604: None,
+983606: None,
42846: None,
42856: None,
42850: None,
@@ -4791,9 +4798,9 @@
7460: None,
7547: None,
7550: None,
-983297: None,
-983323: None,
-983325: None,
+983553: None,
+983579: None,
+983581: None,
7567: None,
11365: None,
42803: None,
@@ -4819,18 +4826,18 @@
7839: None,
567: None,
42865: None,
-983307: None,
-983305: None,
-983331: None,
-983333: None,
+983563: None,
+983561: None,
+983587: None,
+983589: None,
11384: None,
-983327: None,
-983329: None,
+983583: None,
+983585: None,
7570: None,
583: None,
-983299: None,
-983303: None,
-983301: None,
+983555: None,
+983559: None,
+983557: None,
42789: None,
42787: None,
7563: None,
@@ -4840,16 +4847,17 @@
7534: None,
7554: None,
7555: None,
+983041: None,
578: None,
11368: None,
11382: None,
42791: None,
-983310: None,
-983334: None,
-983335: None,
-983309: None,
-983337: None,
-983339: None,
+983566: None,
+983590: None,
+983591: None,
+983565: None,
+983593: None,
+983595: None,
7574: None,
42874: None,
42876: None,
@@ -4859,7 +4867,7 @@
42887: None,
7548: None,
42861: None,
-983341: None,
+983597: None,
585: None,
11370: None,
42819: None,
@@ -4870,27 +4878,27 @@
11361: None,
42825: None,
7557: None,
-983343: None,
+983599: None,
7836: None,
7837: None,
42866: None,
7535: None,
7558: None,
-983345: None,
+983601: None,
7931: None,
7933: None,
42867: None,
565: None,
7536: None,
7559: None,
-983311: None,
+983567: None,
42868: None,
42827: None,
42829: None,
11386: None,
-983313: None,
-983317: None,
-983315: None,
+983569: None,
+983573: None,
+983571: None,
42831: None,
7571: None,
7575: None,
@@ -4909,7 +4917,7 @@
7538: None,
7561: None,
589: None,
-983347: None,
+983603: None,
8580: None,
42815: None,
7572: None,
@@ -4918,7 +4926,7 @@
7540: None,
7562: None,
575: None,
-983319: None,
+983575: None,
42892: None,
7573: None,
7454: None,
@@ -4948,11 +4956,11 @@
7432: None,
11385: None,
42793: None,
-983353: None,
-983321: None,
-983355: None,
-983349: None,
-983351: None,
+983609: None,
+983577: None,
+983611: None,
+983605: None,
+983607: None,
7577: None,
7531: None,
42872: None,
@@ -5661,7 +5669,7 @@
764: None,
42765: None,
42760: None,
-983689: None,
+983945: None,
42770: None,
42769: None,
42764: None,
@@ -6483,6 +6491,7 @@
65047: None,
65046: None,
65096: None,
+983049: None,
65048: None,
65044: None,
9915: None,
@@ -7220,30 +7229,30 @@
43699: None,
43705: None,
3064: None,
-983358: None,
-983378: None,
-983374: None,
-983356: None,
-983379: None,
-983368: None,
-983371: None,
-983370: None,
-983365: None,
-983363: None,
-983357: None,
-983361: None,
-983373: None,
-983359: None,
-983364: None,
-983367: None,
-983372: None,
-983377: None,
-983375: None,
-983376: None,
-983362: None,
-983360: None,
-983369: None,
-983366: None,
+983614: None,
+983634: None,
+983630: None,
+983612: None,
+983635: None,
+983624: None,
+983627: None,
+983626: None,
+983621: None,
+983619: None,
+983613: None,
+983617: None,
+983629: None,
+983615: None,
+983620: None,
+983623: None,
+983628: None,
+983633: None,
+983631: None,
+983632: None,
+983618: None,
+983616: None,
+983625: None,
+983622: None,
3063: None,
3059: None,
3062: None,
@@ -7253,272 +7262,272 @@
3066: None,
3024: None,
3065: None,
-983402: None,
-983409: None,
-983412: None,
-983407: None,
-983408: None,
-983403: None,
-983404: None,
-983410: None,
-983411: None,
-983405: None,
-983406: None,
-983622: None,
-983629: None,
-983632: None,
-983627: None,
-983628: None,
-983623: None,
-983624: None,
-983630: None,
-983631: None,
-983625: None,
-983626: None,
-983578: None,
-983585: None,
-983588: None,
-983583: None,
-983584: None,
-983579: None,
-983580: None,
-983586: None,
-983587: None,
-983581: None,
-983582: None,
-983380: None,
-983387: None,
-983390: None,
-983385: None,
-983386: None,
-983381: None,
-983382: None,
-983388: None,
-983389: None,
-983633: None,
-983634: None,
+983658: None,
+983665: None,
+983668: None,
+983663: None,
+983664: None,
+983659: None,
+983660: None,
+983666: None,
+983667: None,
+983661: None,
+983662: None,
+983878: None,
+983885: None,
+983888: None,
+983883: None,
+983884: None,
+983879: None,
+983880: None,
+983886: None,
+983887: None,
+983881: None,
+983882: None,
+983834: None,
+983841: None,
+983844: None,
+983839: None,
+983840: None,
+983835: None,
+983836: None,
+983842: None,
+983843: None,
+983837: None,
+983838: None,
+983636: None,
+983643: None,
+983646: None,
983641: None,
+983642: None,
+983637: None,
+983638: None,
983644: None,
+983645: None,
+983889: None,
+983890: None,
+983897: None,
+983900: None,
+983895: None,
+983896: None,
+983891: None,
+983892: None,
+983898: None,
+983899: None,
+983893: None,
+983894: None,
983639: None,
983640: None,
-983635: None,
-983636: None,
-983642: None,
-983643: None,
-983637: None,
-983638: None,
-983383: None,
-983384: None,
-983512: None,
-983519: None,
-983522: None,
-983517: None,
-983518: None,
-983513: None,
-983514: None,
-983545: None,
-983552: None,
-983555: None,
-983550: None,
-983551: None,
-983546: None,
-983547: None,
-983534: None,
-983541: None,
-983544: None,
-983539: None,
-983540: None,
-983535: None,
-983536: None,
-983542: None,
-983543: None,
-983537: None,
-983538: None,
-983553: None,
-983554: None,
-983548: None,
-983549: None,
-983520: None,
-983521: None,
-983515: None,
-983516: None,
-983479: None,
-983486: None,
-983489: None,
-983484: None,
-983485: None,
-983480: None,
-983481: None,
-983487: None,
-983488: None,
-983482: None,
-983483: None,
-983457: None,
-983464: None,
-983467: None,
-983462: None,
-983463: None,
-983391: None,
-983398: None,
-983401: None,
-983396: None,
-983397: None,
-983392: None,
-983393: None,
-983399: None,
-983400: None,
-983394: None,
-983395: None,
-983458: None,
-983459: None,
-983435: None,
-983442: None,
-983445: None,
-983440: None,
-983441: None,
-983436: None,
-983437: None,
-983567: None,
-983574: None,
-983577: None,
-983572: None,
-983573: None,
-983568: None,
-983569: None,
-983575: None,
-983576: None,
-983570: None,
-983571: None,
-983443: None,
-983444: None,
-983438: None,
-983439: None,
-983465: None,
-983466: None,
-983460: None,
-983461: None,
-983413: None,
-983420: None,
-983423: None,
-983418: None,
-983419: None,
-983414: None,
-983415: None,
-983421: None,
-983422: None,
-983416: None,
-983417: None,
-983468: None,
-983475: None,
-983478: None,
-983473: None,
-983474: None,
-983469: None,
-983470: None,
-983476: None,
-983477: None,
-983471: None,
-983472: None,
-983501: None,
-983508: None,
-983511: None,
-983506: None,
-983507: None,
-983502: None,
-983503: None,
-983509: None,
-983510: None,
-983556: None,
-983563: None,
-983566: None,
-983561: None,
-983562: None,
-983557: None,
-983558: None,
-983564: None,
-983565: None,
-983559: None,
-983560: None,
-983504: None,
-983505: None,
-983611: None,
-983618: None,
-983621: None,
-983616: None,
-983617: None,
-983589: None,
-983596: None,
-983599: None,
-983594: None,
-983595: None,
-983590: None,
-983591: None,
-983597: None,
-983598: None,
-983645: None,
-983592: None,
-983593: None,
-983612: None,
-983613: None,
-983619: None,
-983620: None,
-983600: None,
-983607: None,
-983610: None,
-983605: None,
-983606: None,
-983601: None,
-983602: None,
-983608: None,
-983609: None,
-983603: None,
-983604: None,
-983614: None,
-983615: None,
-983446: None,
-983453: None,
-983456: None,
-983451: None,
-983452: None,
-983447: None,
-983448: None,
-983454: None,
-983455: None,
-983424: None,
-983431: None,
-983434: None,
-983429: None,
-983430: None,
-983425: None,
-983426: None,
-983432: None,
-983433: None,
-983427: None,
-983428: None,
-983449: None,
-983450: None,
-983523: None,
-983530: None,
-983533: None,
-983528: None,
-983529: None,
-983524: None,
-983525: None,
-983531: None,
-983532: None,
-983526: None,
-983527: None,
-983490: None,
-983497: None,
-983500: None,
-983495: None,
-983496: None,
-983491: None,
-983492: None,
-983498: None,
-983499: None,
-983493: None,
-983494: None,
+983768: None,
+983775: None,
+983778: None,
+983773: None,
+983774: None,
+983769: None,
+983770: None,
+983801: None,
+983808: None,
+983811: None,
+983806: None,
+983807: None,
+983802: None,
+983803: None,
+983790: None,
+983797: None,
+983800: None,
+983795: None,
+983796: None,
+983791: None,
+983792: None,
+983798: None,
+983799: None,
+983793: None,
+983794: None,
+983809: None,
+983810: None,
+983804: None,
+983805: None,
+983776: None,
+983777: None,
+983771: None,
+983772: None,
+983735: None,
+983742: None,
+983745: None,
+983740: None,
+983741: None,
+983736: None,
+983737: None,
+983743: None,
+983744: None,
+983738: None,
+983739: None,
+983713: None,
+983720: None,
+983723: None,
+983718: None,
+983719: None,
+983647: None,
+983654: None,
+983657: None,
+983652: None,
+983653: None,
+983648: None,
+983649: None,
+983655: None,
+983656: None,
+983650: None,
+983651: None,
+983714: None,
+983715: None,
+983691: None,
+983698: None,
+983701: None,
+983696: None,
+983697: None,
+983692: None,
+983693: None,
+983823: None,
+983830: None,
+983833: None,
+983828: None,
+983829: None,
+983824: None,
+983825: None,
+983831: None,
+983832: None,
+983826: None,
+983827: None,
+983699: None,
+983700: None,
+983694: None,
+983695: None,
+983721: None,
+983722: None,
+983716: None,
+983717: None,
+983669: None,
+983676: None,
+983679: None,
+983674: None,
+983675: None,
+983670: None,
+983671: None,
+983677: None,
+983678: None,
+983672: None,
+983673: None,
+983724: None,
+983731: None,
+983734: None,
+983729: None,
+983730: None,
+983725: None,
+983726: None,
+983732: None,
+983733: None,
+983727: None,
+983728: None,
+983757: None,
+983764: None,
+983767: None,
+983762: None,
+983763: None,
+983758: None,
+983759: None,
+983765: None,
+983766: None,
+983812: None,
+983819: None,
+983822: None,
+983817: None,
+983818: None,
+983813: None,
+983814: None,
+983820: None,
+983821: None,
+983815: None,
+983816: None,
+983760: None,
+983761: None,
+983867: None,
+983874: None,
+983877: None,
+983872: None,
+983873: None,
+983845: None,
+983852: None,
+983855: None,
+983850: None,
+983851: None,
+983846: None,
+983847: None,
+983853: None,
+983854: None,
+983901: None,
+983848: None,
+983849: None,
+983868: None,
+983869: None,
+983875: None,
+983876: None,
+983856: None,
+983863: None,
+983866: None,
+983861: None,
+983862: None,
+983857: None,
+983858: None,
+983864: None,
+983865: None,
+983859: None,
+983860: None,
+983870: None,
+983871: None,
+983702: None,
+983709: None,
+983712: None,
+983707: None,
+983708: None,
+983703: None,
+983704: None,
+983710: None,
+983711: None,
+983680: None,
+983687: None,
+983690: None,
+983685: None,
+983686: None,
+983681: None,
+983682: None,
+983688: None,
+983689: None,
+983683: None,
+983684: None,
+983705: None,
+983706: None,
+983779: None,
+983786: None,
+983789: None,
+983784: None,
+983785: None,
+983780: None,
+983781: None,
+983787: None,
+983788: None,
+983782: None,
+983783: None,
+983746: None,
+983753: None,
+983756: None,
+983751: None,
+983752: None,
+983747: None,
+983748: None,
+983754: None,
+983755: None,
+983749: None,
+983750: None,
3061: None,
3196: None,
3193: None,
@@ -7624,6 +7633,7 @@
9928: None,
3947: None,
3948: None,
+983047: None,
4048: None,
4052: None,
4051: None,
@@ -8358,6 +8368,7 @@
11055: None,
11038: None,
11825: None,
+983048: None,
}
_code_by_name = {
}
@@ -8844,6 +8855,7 @@
'BUGINESE VOWEL SIGN I': None,
'BUGINESE VOWEL SIGN O': None,
'BUGINESE VOWEL SIGN U': None,
+'BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS': None,
'CANADIAN SYLLABICS AAY': None,
'CANADIAN SYLLABICS AY': None,
'CANADIAN SYLLABICS BEAVER DENE L': None,
@@ -12790,6 +12802,7 @@
'KAITHI VOWEL SIGN O': None,
'KAITHI VOWEL SIGN U': None,
'KAITHI VOWEL SIGN UU': None,
+'KANNADA LETTER LLLA': None,
'KANNADA SIGN AVAGRAHA': None,
'KANNADA SIGN JIHVAMULIYA': None,
'KANNADA SIGN NUKTA': None,
@@ -12994,6 +13007,10 @@
'KHMER VOWEL SIGN AAM': None,
'KHMER VOWEL SIGN COENG QA': None,
'KHMER VOWEL SIGN OM': None,
+'LAO LETTER FO FAY': None,
+'LAO LETTER FO FON': None,
+'LAO LETTER LO': None,
+'LAO LETTER RO': None,
'LARGE ONE DOT OVER TWO DOTS PUNCTUATION': None,
'LARGE ONE RING OVER TWO RINGS PUNCTUATION': None,
'LARGE TWO DOTS OVER ONE DOT PUNCTUATION': None,
@@ -13028,6 +13045,7 @@
'LATIN CAPITAL LETTER EGYPTOLOGICAL AIN': None,
'LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF': None,
'LATIN CAPITAL LETTER ET': None,
+'LATIN CAPITAL LETTER GHA': None,
'LATIN CAPITAL LETTER GLOTTAL STOP': None,
'LATIN CAPITAL LETTER H WITH DESCENDER': None,
'LATIN CAPITAL LETTER HALF H': None,
@@ -13191,6 +13209,7 @@
'LATIN SMALL LETTER F WITH MIDDLE TILDE': None,
'LATIN SMALL LETTER F WITH PALATAL HOOK': None,
'LATIN SMALL LETTER G WITH PALATAL HOOK': None,
+'LATIN SMALL LETTER GHA': None,
'LATIN SMALL LETTER GLOTTAL STOP': None,
'LATIN SMALL LETTER H WITH DESCENDER': None,
'LATIN SMALL LETTER HALF H': None,
@@ -14834,6 +14853,7 @@
'PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET': None,
'PRESENTATION FORM FOR VERTICAL QUESTION MARK': None,
'PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET': None,
+'PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET': None,
'PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET': None,
'PRESENTATION FORM FOR VERTICAL SEMICOLON': None,
'QUINCUNX': None,
@@ -15975,6 +15995,7 @@
'THUNDER CLOUD AND RAIN': None,
'TIBETAN LETTER KKA': None,
'TIBETAN LETTER RRA': None,
+'TIBETAN MARK BKA- SHOG GI MGO RGYAN': None,
'TIBETAN MARK BSKA- SHOG GI MGO RGYAN': None,
'TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA': None,
'TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA': None,
@@ -16709,6 +16730,7 @@
'WHITE VERTICAL ELLIPSE': None,
'WHITE VERY SMALL SQUARE': None,
'WORD SEPARATOR MIDDLE DOT': None,
+'YI SYLLABLE ITERATION MARK': None,
}
_cjk_prefix = "CJK UNIFIED IDEOGRAPH-"
@@ -21257,8 +21279,19 @@
def lookup_named_sequence(code):
- if 0 <= code - 983296 < len(_named_sequences):
- return _named_sequences[code - 983296]
+ if 0 <= code - 983552 < len(_named_sequences):
+ return _named_sequences[code - 983552]
else:
return None
+_name_aliases = [
+]
+
+
+def lookup_with_alias(name):
+ code = lookup(name)
+ if 0 <= code - 983040 < len(_name_aliases):
+ return _name_aliases[code - 983040]
+ else:
+ return code
+
diff --git a/rpython/rlib/unicodedata/unicodedb_5_2_0.py b/rpython/rlib/unicodedata/unicodedb_5_2_0.py
--- a/rpython/rlib/unicodedata/unicodedb_5_2_0.py
+++ b/rpython/rlib/unicodedata/unicodedb_5_2_0.py
@@ -190,6 +190,7 @@
'\x07 KEFULA'
'\x08 KEMBANG'
'\x0e KISIM5 TIMES '
+'\x02 L'
'\x05 LACA'
'\x11 LAGAB TIMES ASH2'
'\x11 LAGAR OVER LAGAR'
@@ -1349,7 +1350,7 @@
'\x04CHOR'
'\tCHOSEONG '
'\x06CHRIVI'
-'\rCHROMA SYNAFI'
+'\x07CHROMA '
'\rCHRYSANTHEMUM'
'\x07CHU CAN'
'\x05CHULA'
@@ -1377,6 +1378,7 @@
'\x02CK'
'\x07CK MARK'
')CK-TILTED SHADOWED WHITE RIGHTWARDS ARROW'
+'\x04CKET'
'\x05CKING'
'\x06CKNESS'
'\x07CKWISE '
@@ -1932,6 +1934,7 @@
'\x07ER THAN'
'\x08ER TRUTH'
'\x08ERAL URN'
+'\x0cERATION MARK'
'\x0bERCENT SIGN'
'\x07ERCIAL '
'\tERCIAL AT'
@@ -2251,6 +2254,7 @@
'\x04GHWA'
'\x02GI'
'\x07GI GUNU'
+'\x0cGI MGO RGYAN'
'\x04GIBA'
'\x06GICAL '
'\x04GIDA'
@@ -2972,7 +2976,7 @@
'\nK2 PLUS BU'
'\x02K4'
'\x02KA'
-'\x10KA- SHOG YIG MGO'
+'\tKA- SHOG '
'\x04KAAF'
'\x03KAB'
'\tKABA TENU'
@@ -3003,6 +3007,7 @@
'\x06KAYAH '
'\x07KAYANNA'
'\x12KBAR ISOLATED FORM'
+'\x04KCET'
'\x02KE'
'\x06KE PHO'
'\x16KEEPING STILL MOUNTAIN'
@@ -3187,8 +3192,8 @@
'\nLENDED YUS'
'\x0bLENGTH MARK'
'\x05LENIS'
+'\x0eLENTICULAR BRA'
'\x12LENTICULAR BRACKET'
-'\x12LENTICULAR BRAKCET'
'\x04LEPH'
'\x0cLER CONSTANT'
'\x08LESS SHA'
@@ -3276,7 +3281,6 @@
'\x0eLMOST EQUAL TO'
'\x0fLMOST EQUAL TO '
'\x02LO'
-'\x04LO L'
'\rLOCATION SIGN'
'\x08LOCATIVE'
'\tLOCKWISE '
@@ -5041,6 +5045,7 @@
'\x0bSYMBOL FOR '
'\x12SYMMETRIC SWAPPING'
'\x16SYMPTOTICALLY EQUAL TO'
+'\x06SYNAFI'
'\x07SYNAGMA'
'\rSYNDESMOS NEO'
'\tSYNTHETON'
@@ -5598,6 +5603,7 @@
'\x13VARIANT WITH SQUARE'
'\x13VARIATION INDICATOR'
'\x0bVARYS ICHOS'
+'\x05VASIS'
'\x13VASTNESS OR WASTING'
'\x03VAV'
'\x07VAV YOD'
@@ -5907,6 +5913,7 @@
'\x06YGISMA'
'\x02YI'
'\x08YIDDISH '
+'\x07YIG MGO'
'\x08YIG MGO '
'\x0fYIG MGO MDUN MA'
'\x14YIG MGO PHUR SHAD MA'
@@ -5998,6313 +6005,6313 @@
'\x05ZYGOS'
)
_charnodes =[70758,
- -54016,
+ -54013,
-1,
132371,
- 28772,
+ 28800,
-1,
197694,
- 78442,
+ 78444,
-1,
262727,
- 136032,
+ 136035,
-1,
327957,
- 202072,
+ 202075,
-1,
393238,
- 282255,
+ 282270,
-1,
-65529,
- 347791,
+ 347806,
195071,
-65528,
- 409765,
+ 409767,
195070,
-65527,
- 472618,
+ 472620,
195069,
-65526,
- 535808,
+ 535811,
195068,
-65525,
- 599704,
+ 599707,
195067,
-65524,
- 660875,
+ 660878,
195066,
-65523,
- 726360,
+ 726363,
195065,
-65522,
- 791844,
+ 791847,
195064,
-65521,
- 857309,
+ 857312,
195063,
-65520,
- 922776,
+ 922779,
195062,
-65519,
- 988192,
+ 988195,
195061,
-65518,
- 1053651,
+ 1053654,
195060,
-65517,
- 1119075,
+ 1119078,
195059,
-65516,
- 1184446,
+ 1184449,
195058,
-65515,
- 1249830,
+ 1249833,
195057,
-1,
- 1315238,
+ 1315241,
195056,
1507367,
- 344229,
+ 344231,
-1,
-65512,
- 1461903,
+ 1461918,
195055,
-65511,
- 1523877,
+ 1523879,
195054,
-65510,
- 1586730,
+ 1586732,
195053,
-65509,
- 1649920,
+ 1649923,
195052,
-65508,
- 1713816,
+ 1713819,
195051,
-65507,
- 1774987,
+ 1774990,
195050,
-65506,
- 1840472,
+ 1840475,
195049,
-65505,
- 1905956,
+ 1905959,
195048,
-65504,
- 1971421,
+ 1971424,
195047,
-65503,
- 2036888,
+ 2036891,
195046,
-65502,
- 2102304,
+ 2102307,
195045,
-65501,
- 2167763,
+ 2167766,
195044,
-65500,
- 2233187,
+ 2233190,
195043,
-65499,
- 2298558,
+ 2298561,
195042,
-65498,
- 2363942,
+ 2363945,
195041,
-1,
- 2429350,
+ 2429353,
195040,
2621496,
- 1455658,
+ 1455660,
-1,
-65495,
- 2576015,
+ 2576030,
195039,
-65494,
- 2637989,
+ 2637991,
195038,
-65493,
- 2700842,
+ 2700844,
195037,
-65492,
- 2764032,
+ 2764035,
195036,
-65491,
- 2827928,
+ 2827931,
195035,
-65490,
- 2889099,
+ 2889102,
195034,
-65489,
- 2954584,
+ 2954587,
195033,
-65488,
- 3020068,
+ 3020071,
195032,
-65487,
- 3085533,
+ 3085536,
195031,
-65486,
- 3151000,
+ 3151003,
195030,
-65485,
- 3216416,
+ 3216419,
195029,
-65484,
- 3281875,
+ 3281878,
195028,
-65483,
- 3347299,
+ 3347302,
195027,
-65482,
- 3412670,
+ 3412673,
195026,
-65481,
- 3478054,
+ 3478057,
195025,
-1,
- 3543462,
+ 3543465,
195024,
3735625,
- 2567424,
+ 2567427,
-1,
-65478,
- 3690127,
+ 3690142,
195023,
-65477,
- 3752101,
+ 3752103,
195022,
-65476,
- 3814954,
+ 3814956,
195021,
-65475,
- 3878144,
+ 3878147,
195020,
-65474,
- 3942040,
+ 3942043,
195019,
-65473,
- 4003211,
+ 4003214,
195018,
-65472,
- 4068696,
+ 4068699,
195017,
-65471,
- 4134180,
+ 4134183,
195016,
-65470,
- 4199645,
+ 4199648,
195015,
-65469,
- 4265112,
+ 4265115,
195014,
-65468,
- 4330528,
+ 4330531,
195013,
-65467,
- 4395987,
+ 4395990,
195012,
-65466,
- 4461411,
+ 4461414,
195011,
-65465,
- 4526782,
+ 4526785,
195010,
-65464,
- 4592166,
+ 4592169,
195009,
-1,
- 4657574,
+ 4657577,
195008,
4849754,
- 3679896,
+ 3679899,
-1,
-65461,
- 4804239,
+ 4804254,
195007,
-65460,
- 4866213,
+ 4866215,
195006,
-65459,
- 4929066,
+ 4929068,
195005,
-65458,
- 4992256,
+ 4992259,
195004,
-65457,
- 5056152,
+ 5056155,
195003,
-65456,
- 5117323,
+ 5117326,
195002,
-65455,
- 5182808,
+ 5182811,
195001,
-65454,
- 5248292,
+ 5248295,
195000,
-65453,
- 5313757,
+ 5313760,
194999,
-65452,
- 5379224,
+ 5379227,
194998,
-65451,
- 5444640,
+ 5444643,
194997,
-65450,
- 5510099,
+ 5510102,
194996,
-65449,
- 5575523,
+ 5575526,
194995,
-65448,
- 5640894,
+ 5640897,
194994,
-65447,
- 5706278,
+ 5706281,
194993,
-1,
- 5771686,
+ 5771689,
194992,
5963883,
- 4789643,
+ 4789646,
-1,
-65444,
- 5918351,
+ 5918366,
194991,
-65443,
- 5980325,
+ 5980327,
194990,
-65442,
- 6043178,
+ 6043180,
194989,
-65441,
- 6106368,
+ 6106371,
194988,
-65440,
- 6170264,
+ 6170267,
194987,
-65439,
- 6231435,
+ 6231438,
194986,
-65438,
- 6296920,
+ 6296923,
194985,
-65437,
- 6362404,
+ 6362407,
194984,
-65436,
- 6427869,
+ 6427872,
194983,
-65435,
- 6493336,
+ 6493339,
194982,
-65434,
- 6558752,
+ 6558755,
194981,
-65433,
- 6624211,
+ 6624214,
194980,
-65432,
- 6689635,
+ 6689638,
194979,
-65431,
- 6755006,
+ 6755009,
194978,
-65430,
- 6820390,
+ 6820393,
194977,
-1,
- 6885798,
+ 6885801,
194976,
7078012,
- 5903704,
+ 5903707,
-1,
-65427,
- 7032463,
+ 7032478,
194975,
-65426,
- 7094437,
+ 7094439,
194974,
-65425,
- 7157290,
+ 7157292,
194973,
-65424,
- 7220480,
+ 7220483,
194972,
-65423,
- 7284376,
+ 7284379,
194971,
-65422,
- 7345547,
+ 7345550,
194970,
-65421,
- 7411032,
+ 7411035,
194969,
-65420,
- 7476516,
+ 7476519,
194968,
-65419,
- 7541981,
+ 7541984,
194967,
-65418,
- 7607448,
+ 7607451,
194966,
-65417,
- 7672864,
+ 7672867,
194965,
-65416,
- 7738323,
+ 7738326,
194964,
-65415,
- 7803747,
+ 7803750,
194963,
-65414,
- 7869118,
+ 7869121,
194962,
-65413,
- 7934502,
+ 7934505,
194961,
-1,
- 7999910,
+ 7999913,
194960,
8192141,
- 7017764,
+ 7017767,
-1,
-65410,
- 8146575,
+ 8146590,
194959,
-65409,
- 8208549,
+ 8208551,
194958,
-65408,
- 8271402,
+ 8271404,
194957,
-65407,
- 8334592,
+ 8334595,
194956,
-65406,
- 8398488,
+ 8398491,
194955,
-65405,
- 8459659,
+ 8459662,
194954,
-65404,
- 8525144,
+ 8525147,
194953,
-65403,
- 8590628,
+ 8590631,
194952,
-65402,
- 8656093,
+ 8656096,
194951,
-65401,
- 8721560,
+ 8721563,
194950,
-65400,
- 8786976,
+ 8786979,
194949,
-65399,
- 8852435,
+ 8852438,
194948,
-65398,
- 8917859,
+ 8917862,
194947,
-65397,
- 8983230,
+ 8983233,
194946,
-65396,
- 9048614,
+ 9048617,
194945,
-1,
- 9114022,
+ 9114025,
194944,
9306270,
- 8131805,
+ 8131808,
-1,
-65393,
- 9260687,
+ 9260702,
194943,
-65392,
- 9322661,
+ 9322663,
194942,
-65391,
- 9385514,
+ 9385516,
194941,
-65390,
- 9448704,
+ 9448707,
194940,
-65389,
- 9512600,
+ 9512603,
194939,
-65388,
- 9573771,
+ 9573774,
194938,
-65387,
- 9639256,
+ 9639259,
194937,
-65386,
- 9704740,
+ 9704743,
194936,
-65385,
- 9770205,
+ 9770208,
194935,
-65384,
- 9835672,
+ 9835675,
194934,
-65383,
- 9901088,
+ 9901091,
194933,
-65382,
- 9966547,
+ 9966550,
194932,
-65381,
- 10031971,
+ 10031974,
194931,
-65380,
- 10097342,
+ 10097345,
194930,
-65379,
- 10162726,
+ 10162729,
194929,
-1,
- 10228134,
+ 10228137,
194928,
10420399,
- 9245848,
+ 9245851,
-1,
-65376,
- 10374799,
+ 10374814,
194927,
-65375,
- 10436773,
+ 10436775,
194926,
-65374,
- 10499626,
+ 10499628,
194925,
-65373,
- 10562816,
+ 10562819,
194924,
-65372,
- 10626712,
+ 10626715,
194923,
-65371,
- 10687883,
+ 10687886,
194922,
-65370,
- 10753368,
+ 10753371,
194921,
-65369,
- 10818852,
+ 10818855,
194920,
-65368,
- 10884317,
+ 10884320,
194919,
-65367,
- 10949784,
+ 10949787,
194918,
-65366,
- 11015200,
+ 11015203,
More information about the pypy-commit
mailing list