[pypy-commit] pypy py3.3: Unicodedb: Add support for Aliases.

amauryfa noreply at buildbot.pypy.org
Mon Mar 16 01:16:26 CET 2015


Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: py3.3
Changeset: r76391:06f9a5ad6287
Date: 2015-03-16 01:15 +0100
http://bitbucket.org/pypy/pypy/changeset/06f9a5ad6287/

Log:	Unicodedb: Add support for Aliases.

diff too long, truncating to 2000 out of 172399 lines

diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py
--- a/pypy/module/unicodedata/interp_ucd.py
+++ b/pypy/module/unicodedata/interp_ucd.py
@@ -75,7 +75,7 @@
 
 class UCD(W_Root):
     def __init__(self, unicodedb):
-        self._lookup = unicodedb.lookup
+        self._lookup = unicodedb.lookup_with_alias
         self._lookup_named_sequence = unicodedb.lookup_named_sequence
         self._name = unicodedb.name
         self._decimal = unicodedb.decimal
diff --git a/pypy/module/unicodedata/test/test_unicodedata.py b/pypy/module/unicodedata/test/test_unicodedata.py
--- a/pypy/module/unicodedata/test/test_unicodedata.py
+++ b/pypy/module/unicodedata/test/test_unicodedata.py
@@ -107,6 +107,27 @@
         import unicodedata
         raises(TypeError, unicodedata.bidirectional, 'xx')
 
+    def test_aliases(self):
+        import unicodedata
+        aliases = [
+            ('LATIN CAPITAL LETTER GHA', 0x01A2),
+            ('LATIN SMALL LETTER GHA', 0x01A3),
+            ('KANNADA LETTER LLLA', 0x0CDE),
+            ('LAO LETTER FO FON', 0x0E9D),
+            ('LAO LETTER FO FAY', 0x0E9F),
+            ('LAO LETTER RO', 0x0EA3),
+            ('LAO LETTER LO', 0x0EA5),
+            ('TIBETAN MARK BKA- SHOG GI MGO RGYAN', 0x0FD0),
+            ('YI SYLLABLE ITERATION MARK', 0xA015),
+            ('PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET', 0xFE18),
+            ('BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS', 0x1D0C5)
+        ]
+        for alias, codepoint in aliases:
+            name = unicodedata.name(chr(codepoint))
+            assert name != alias
+            assert unicodedata.lookup(alias) == unicodedata.lookup(name)
+            raises(KeyError, unicodedata.ucd_3_2_0.lookup, alias)
+
     def test_named_sequences(self):
         import unicodedata
         sequences = [
diff --git a/rpython/rlib/unicodedata/generate_unicodedb.py b/rpython/rlib/unicodedata/generate_unicodedb.py
--- a/rpython/rlib/unicodedata/generate_unicodedb.py
+++ b/rpython/rlib/unicodedata/generate_unicodedb.py
@@ -88,10 +88,11 @@
 class UnicodeData(object):
     # we use this range of PUA_15 to store name aliases and named sequences
     NAME_ALIASES_START = 0xF0000
-    NAMED_SEQUENCES_START = 0xF0100
+    NAMED_SEQUENCES_START = 0xF0200
 
     def __init__(self):
         self.table = [None] * (MAXUNICODE + 1)
+        self.aliases = []
         self.named_sequences = []
 
     def add_char(self, code, char):
@@ -149,6 +150,12 @@
             self.table[code].canonical_decomp = result
         return self.table[code].canonical_decomp
 
+    def add_alias(self, name, char):
+        pua_index = self.NAME_ALIASES_START + len(self.aliases)
+        self.aliases.append((name, char))
+        # also store the name in the PUA 1
+        self.table[pua_index].name = name
+
     def add_named_sequence(self, name, chars):
         pua_index = self.NAMED_SEQUENCES_START + len(self.named_sequences)
         self.named_sequences.append((name, chars))
@@ -262,6 +269,16 @@
         table.get_canonical_decomposition(code)
         table.get_compat_decomposition(code)
 
+    # Name aliases
+    for line in files['name_aliases']:
+        line = line.strip()
+        if not line or line.startswith('#'):
+            continue
+        items = line.split(';')
+        char = int(items[0], 16)
+        name = items[1]
+        table.add_alias(name, char)
+
     # Named sequences
     for line in files['named_sequences']:
         line = line.strip()
@@ -786,7 +803,21 @@
         return None
 ''' % dict(start=table.NAMED_SEQUENCES_START)
     
-        
+    # aliases
+    print >> outfile, '_name_aliases = ['
+    for name, char in table.aliases:
+        print >> outfile, "%s," % (char,)
+    print >> outfile, ']'
+    print >> outfile, '''
+
+def lookup_with_alias(name):
+    code = lookup(name)
+    if 0 <= code - %(start)s < len(_name_aliases):
+        return _name_aliases[code - %(start)s]
+    else:
+        return code
+''' % dict(start=table.NAME_ALIASES_START)
+
 
 def main():
     import sys
diff --git a/rpython/rlib/unicodedata/unicodedb_3_2_0.py b/rpython/rlib/unicodedata/unicodedb_3_2_0.py
--- a/rpython/rlib/unicodedata/unicodedb_3_2_0.py
+++ b/rpython/rlib/unicodedata/unicodedb_3_2_0.py
@@ -493,6 +493,7 @@
 6679: None,
 6682: None,
 6680: None,
+983050: None,
 6322: None,
 6321: None,
 6387: None,
@@ -3668,7 +3669,7 @@
 9965: None,
 4346: None,
 4345: None,
-983646: None,
+983902: None,
 11520: None,
 11521: None,
 11546: None,
@@ -4439,13 +4440,14 @@
 69815: None,
 69811: None,
 69812: None,
+983042: None,
 3261: None,
 3313: None,
 3260: None,
 3314: None,
 3298: None,
 3299: None,
-983688: None,
+983944: None,
 43272: None,
 43269: None,
 43268: None,
@@ -4559,44 +4561,44 @@
 68102: None,
 68098: None,
 68099: None,
-983667: None,
-983652: None,
-983653: None,
-983655: None,
-983654: None,
-983657: None,
-983659: None,
-983679: None,
-983647: None,
-983648: None,
-983650: None,
-983649: None,
-983680: None,
-983674: None,
-983671: None,
-983661: None,
-983651: None,
-983666: None,
-983656: None,
-983668: None,
-983670: None,
-983669: None,
-983673: None,
-983678: None,
-983676: None,
-983677: None,
-983662: None,
-983663: None,
-983665: None,
-983664: None,
-983658: None,
-983660: None,
-983675: None,
-983672: None,
-983685: None,
-983682: None,
-983683: None,
-983684: None,
+983923: None,
+983908: None,
+983909: None,
+983911: None,
+983910: None,
+983913: None,
+983915: None,
+983935: None,
+983903: None,
+983904: None,
+983906: None,
+983905: None,
+983936: None,
+983930: None,
+983927: None,
+983917: None,
+983907: None,
+983922: None,
+983912: None,
+983924: None,
+983926: None,
+983925: None,
+983929: None,
+983934: None,
+983932: None,
+983933: None,
+983918: None,
+983919: None,
+983921: None,
+983920: None,
+983914: None,
+983916: None,
+983931: None,
+983928: None,
+983941: None,
+983938: None,
+983939: None,
+983940: None,
 6109: None,
 6627: None,
 6643: None,
@@ -4640,16 +4642,20 @@
 6631: None,
 6647: None,
 6640: None,
-983687: None,
-983681: None,
-983686: None,
+983943: None,
+983937: None,
+983942: None,
+983044: None,
+983043: None,
+983046: None,
+983045: None,
 68413: None,
 68415: None,
 68412: None,
 68414: None,
-983296: None,
-983322: None,
-983324: None,
+983552: None,
+983578: None,
+983580: None,
 570: None,
 42802: None,
 11373: None,
@@ -4664,26 +4670,27 @@
 42862: None,
 42796: None,
 42798: None,
-983306: None,
-983304: None,
-983330: None,
-983332: None,
-983326: None,
-983328: None,
+983562: None,
+983560: None,
+983586: None,
+983588: None,
+983582: None,
+983584: None,
 582: None,
-983298: None,
-983302: None,
-983300: None,
+983554: None,
+983558: None,
+983556: None,
 42788: None,
 42786: None,
 42858: None,
+983040: None,
 577: None,
 11367: None,
 11381: None,
 42790: None,
-983308: None,
-983336: None,
-983338: None,
+983564: None,
+983592: None,
+983594: None,
 42873: None,
 42875: None,
 42877: None,
@@ -4692,7 +4699,7 @@
 42886: None,
 42860: None,
 584: None,
-983340: None,
+983596: None,
 11369: None,
 42818: None,
 42816: None,
@@ -4701,16 +4708,16 @@
 11360: None,
 42824: None,
 11362: None,
-983342: None,
+983598: None,
 11374: None,
-983344: None,
+983600: None,
 7930: None,
 7932: None,
 42826: None,
 42828: None,
-983312: None,
-983316: None,
-983314: None,
+983568: None,
+983572: None,
+983570: None,
 42830: None,
 42834: None,
 42836: None,
@@ -4721,11 +4728,11 @@
 42842: None,
 588: None,
 11364: None,
-983346: None,
+983602: None,
 42814: None,
 42844: None,
 11390: None,
-983318: None,
+983574: None,
 42891: None,
 7838: None,
 586: None,
@@ -4740,11 +4747,11 @@
 581: None,
 42792: None,
 580: None,
-983352: None,
-983320: None,
-983354: None,
-983348: None,
-983350: None,
+983608: None,
+983576: None,
+983610: None,
+983604: None,
+983606: None,
 42846: None,
 42856: None,
 42850: None,
@@ -4791,9 +4798,9 @@
 7460: None,
 7547: None,
 7550: None,
-983297: None,
-983323: None,
-983325: None,
+983553: None,
+983579: None,
+983581: None,
 7567: None,
 11365: None,
 42803: None,
@@ -4819,18 +4826,18 @@
 7839: None,
 567: None,
 42865: None,
-983307: None,
-983305: None,
-983331: None,
-983333: None,
+983563: None,
+983561: None,
+983587: None,
+983589: None,
 11384: None,
-983327: None,
-983329: None,
+983583: None,
+983585: None,
 7570: None,
 583: None,
-983299: None,
-983303: None,
-983301: None,
+983555: None,
+983559: None,
+983557: None,
 42789: None,
 42787: None,
 7563: None,
@@ -4840,16 +4847,17 @@
 7534: None,
 7554: None,
 7555: None,
+983041: None,
 578: None,
 11368: None,
 11382: None,
 42791: None,
-983310: None,
-983334: None,
-983335: None,
-983309: None,
-983337: None,
-983339: None,
+983566: None,
+983590: None,
+983591: None,
+983565: None,
+983593: None,
+983595: None,
 7574: None,
 42874: None,
 42876: None,
@@ -4859,7 +4867,7 @@
 42887: None,
 7548: None,
 42861: None,
-983341: None,
+983597: None,
 585: None,
 11370: None,
 42819: None,
@@ -4870,27 +4878,27 @@
 11361: None,
 42825: None,
 7557: None,
-983343: None,
+983599: None,
 7836: None,
 7837: None,
 42866: None,
 7535: None,
 7558: None,
-983345: None,
+983601: None,
 7931: None,
 7933: None,
 42867: None,
 565: None,
 7536: None,
 7559: None,
-983311: None,
+983567: None,
 42868: None,
 42827: None,
 42829: None,
 11386: None,
-983313: None,
-983317: None,
-983315: None,
+983569: None,
+983573: None,
+983571: None,
 42831: None,
 7571: None,
 7575: None,
@@ -4909,7 +4917,7 @@
 7538: None,
 7561: None,
 589: None,
-983347: None,
+983603: None,
 8580: None,
 42815: None,
 7572: None,
@@ -4918,7 +4926,7 @@
 7540: None,
 7562: None,
 575: None,
-983319: None,
+983575: None,
 42892: None,
 7573: None,
 7454: None,
@@ -4948,11 +4956,11 @@
 7432: None,
 11385: None,
 42793: None,
-983353: None,
-983321: None,
-983355: None,
-983349: None,
-983351: None,
+983609: None,
+983577: None,
+983611: None,
+983605: None,
+983607: None,
 7577: None,
 7531: None,
 42872: None,
@@ -5661,7 +5669,7 @@
 764: None,
 42765: None,
 42760: None,
-983689: None,
+983945: None,
 42770: None,
 42769: None,
 42764: None,
@@ -6483,6 +6491,7 @@
 65047: None,
 65046: None,
 65096: None,
+983049: None,
 65048: None,
 65044: None,
 9915: None,
@@ -7220,30 +7229,30 @@
 43699: None,
 43705: None,
 3064: None,
-983358: None,
-983378: None,
-983374: None,
-983356: None,
-983379: None,
-983368: None,
-983371: None,
-983370: None,
-983365: None,
-983363: None,
-983357: None,
-983361: None,
-983373: None,
-983359: None,
-983364: None,
-983367: None,
-983372: None,
-983377: None,
-983375: None,
-983376: None,
-983362: None,
-983360: None,
-983369: None,
-983366: None,
+983614: None,
+983634: None,
+983630: None,
+983612: None,
+983635: None,
+983624: None,
+983627: None,
+983626: None,
+983621: None,
+983619: None,
+983613: None,
+983617: None,
+983629: None,
+983615: None,
+983620: None,
+983623: None,
+983628: None,
+983633: None,
+983631: None,
+983632: None,
+983618: None,
+983616: None,
+983625: None,
+983622: None,
 3063: None,
 3059: None,
 3062: None,
@@ -7253,272 +7262,272 @@
 3066: None,
 3024: None,
 3065: None,
-983402: None,
-983409: None,
-983412: None,
-983407: None,
-983408: None,
-983403: None,
-983404: None,
-983410: None,
-983411: None,
-983405: None,
-983406: None,
-983622: None,
-983629: None,
-983632: None,
-983627: None,
-983628: None,
-983623: None,
-983624: None,
-983630: None,
-983631: None,
-983625: None,
-983626: None,
-983578: None,
-983585: None,
-983588: None,
-983583: None,
-983584: None,
-983579: None,
-983580: None,
-983586: None,
-983587: None,
-983581: None,
-983582: None,
-983380: None,
-983387: None,
-983390: None,
-983385: None,
-983386: None,
-983381: None,
-983382: None,
-983388: None,
-983389: None,
-983633: None,
-983634: None,
+983658: None,
+983665: None,
+983668: None,
+983663: None,
+983664: None,
+983659: None,
+983660: None,
+983666: None,
+983667: None,
+983661: None,
+983662: None,
+983878: None,
+983885: None,
+983888: None,
+983883: None,
+983884: None,
+983879: None,
+983880: None,
+983886: None,
+983887: None,
+983881: None,
+983882: None,
+983834: None,
+983841: None,
+983844: None,
+983839: None,
+983840: None,
+983835: None,
+983836: None,
+983842: None,
+983843: None,
+983837: None,
+983838: None,
+983636: None,
+983643: None,
+983646: None,
 983641: None,
+983642: None,
+983637: None,
+983638: None,
 983644: None,
+983645: None,
+983889: None,
+983890: None,
+983897: None,
+983900: None,
+983895: None,
+983896: None,
+983891: None,
+983892: None,
+983898: None,
+983899: None,
+983893: None,
+983894: None,
 983639: None,
 983640: None,
-983635: None,
-983636: None,
-983642: None,
-983643: None,
-983637: None,
-983638: None,
-983383: None,
-983384: None,
-983512: None,
-983519: None,
-983522: None,
-983517: None,
-983518: None,
-983513: None,
-983514: None,
-983545: None,
-983552: None,
-983555: None,
-983550: None,
-983551: None,
-983546: None,
-983547: None,
-983534: None,
-983541: None,
-983544: None,
-983539: None,
-983540: None,
-983535: None,
-983536: None,
-983542: None,
-983543: None,
-983537: None,
-983538: None,
-983553: None,
-983554: None,
-983548: None,
-983549: None,
-983520: None,
-983521: None,
-983515: None,
-983516: None,
-983479: None,
-983486: None,
-983489: None,
-983484: None,
-983485: None,
-983480: None,
-983481: None,
-983487: None,
-983488: None,
-983482: None,
-983483: None,
-983457: None,
-983464: None,
-983467: None,
-983462: None,
-983463: None,
-983391: None,
-983398: None,
-983401: None,
-983396: None,
-983397: None,
-983392: None,
-983393: None,
-983399: None,
-983400: None,
-983394: None,
-983395: None,
-983458: None,
-983459: None,
-983435: None,
-983442: None,
-983445: None,
-983440: None,
-983441: None,
-983436: None,
-983437: None,
-983567: None,
-983574: None,
-983577: None,
-983572: None,
-983573: None,
-983568: None,
-983569: None,
-983575: None,
-983576: None,
-983570: None,
-983571: None,
-983443: None,
-983444: None,
-983438: None,
-983439: None,
-983465: None,
-983466: None,
-983460: None,
-983461: None,
-983413: None,
-983420: None,
-983423: None,
-983418: None,
-983419: None,
-983414: None,
-983415: None,
-983421: None,
-983422: None,
-983416: None,
-983417: None,
-983468: None,
-983475: None,
-983478: None,
-983473: None,
-983474: None,
-983469: None,
-983470: None,
-983476: None,
-983477: None,
-983471: None,
-983472: None,
-983501: None,
-983508: None,
-983511: None,
-983506: None,
-983507: None,
-983502: None,
-983503: None,
-983509: None,
-983510: None,
-983556: None,
-983563: None,
-983566: None,
-983561: None,
-983562: None,
-983557: None,
-983558: None,
-983564: None,
-983565: None,
-983559: None,
-983560: None,
-983504: None,
-983505: None,
-983611: None,
-983618: None,
-983621: None,
-983616: None,
-983617: None,
-983589: None,
-983596: None,
-983599: None,
-983594: None,
-983595: None,
-983590: None,
-983591: None,
-983597: None,
-983598: None,
-983645: None,
-983592: None,
-983593: None,
-983612: None,
-983613: None,
-983619: None,
-983620: None,
-983600: None,
-983607: None,
-983610: None,
-983605: None,
-983606: None,
-983601: None,
-983602: None,
-983608: None,
-983609: None,
-983603: None,
-983604: None,
-983614: None,
-983615: None,
-983446: None,
-983453: None,
-983456: None,
-983451: None,
-983452: None,
-983447: None,
-983448: None,
-983454: None,
-983455: None,
-983424: None,
-983431: None,
-983434: None,
-983429: None,
-983430: None,
-983425: None,
-983426: None,
-983432: None,
-983433: None,
-983427: None,
-983428: None,
-983449: None,
-983450: None,
-983523: None,
-983530: None,
-983533: None,
-983528: None,
-983529: None,
-983524: None,
-983525: None,
-983531: None,
-983532: None,
-983526: None,
-983527: None,
-983490: None,
-983497: None,
-983500: None,
-983495: None,
-983496: None,
-983491: None,
-983492: None,
-983498: None,
-983499: None,
-983493: None,
-983494: None,
+983768: None,
+983775: None,
+983778: None,
+983773: None,
+983774: None,
+983769: None,
+983770: None,
+983801: None,
+983808: None,
+983811: None,
+983806: None,
+983807: None,
+983802: None,
+983803: None,
+983790: None,
+983797: None,
+983800: None,
+983795: None,
+983796: None,
+983791: None,
+983792: None,
+983798: None,
+983799: None,
+983793: None,
+983794: None,
+983809: None,
+983810: None,
+983804: None,
+983805: None,
+983776: None,
+983777: None,
+983771: None,
+983772: None,
+983735: None,
+983742: None,
+983745: None,
+983740: None,
+983741: None,
+983736: None,
+983737: None,
+983743: None,
+983744: None,
+983738: None,
+983739: None,
+983713: None,
+983720: None,
+983723: None,
+983718: None,
+983719: None,
+983647: None,
+983654: None,
+983657: None,
+983652: None,
+983653: None,
+983648: None,
+983649: None,
+983655: None,
+983656: None,
+983650: None,
+983651: None,
+983714: None,
+983715: None,
+983691: None,
+983698: None,
+983701: None,
+983696: None,
+983697: None,
+983692: None,
+983693: None,
+983823: None,
+983830: None,
+983833: None,
+983828: None,
+983829: None,
+983824: None,
+983825: None,
+983831: None,
+983832: None,
+983826: None,
+983827: None,
+983699: None,
+983700: None,
+983694: None,
+983695: None,
+983721: None,
+983722: None,
+983716: None,
+983717: None,
+983669: None,
+983676: None,
+983679: None,
+983674: None,
+983675: None,
+983670: None,
+983671: None,
+983677: None,
+983678: None,
+983672: None,
+983673: None,
+983724: None,
+983731: None,
+983734: None,
+983729: None,
+983730: None,
+983725: None,
+983726: None,
+983732: None,
+983733: None,
+983727: None,
+983728: None,
+983757: None,
+983764: None,
+983767: None,
+983762: None,
+983763: None,
+983758: None,
+983759: None,
+983765: None,
+983766: None,
+983812: None,
+983819: None,
+983822: None,
+983817: None,
+983818: None,
+983813: None,
+983814: None,
+983820: None,
+983821: None,
+983815: None,
+983816: None,
+983760: None,
+983761: None,
+983867: None,
+983874: None,
+983877: None,
+983872: None,
+983873: None,
+983845: None,
+983852: None,
+983855: None,
+983850: None,
+983851: None,
+983846: None,
+983847: None,
+983853: None,
+983854: None,
+983901: None,
+983848: None,
+983849: None,
+983868: None,
+983869: None,
+983875: None,
+983876: None,
+983856: None,
+983863: None,
+983866: None,
+983861: None,
+983862: None,
+983857: None,
+983858: None,
+983864: None,
+983865: None,
+983859: None,
+983860: None,
+983870: None,
+983871: None,
+983702: None,
+983709: None,
+983712: None,
+983707: None,
+983708: None,
+983703: None,
+983704: None,
+983710: None,
+983711: None,
+983680: None,
+983687: None,
+983690: None,
+983685: None,
+983686: None,
+983681: None,
+983682: None,
+983688: None,
+983689: None,
+983683: None,
+983684: None,
+983705: None,
+983706: None,
+983779: None,
+983786: None,
+983789: None,
+983784: None,
+983785: None,
+983780: None,
+983781: None,
+983787: None,
+983788: None,
+983782: None,
+983783: None,
+983746: None,
+983753: None,
+983756: None,
+983751: None,
+983752: None,
+983747: None,
+983748: None,
+983754: None,
+983755: None,
+983749: None,
+983750: None,
 3061: None,
 3196: None,
 3193: None,
@@ -7624,6 +7633,7 @@
 9928: None,
 3947: None,
 3948: None,
+983047: None,
 4048: None,
 4052: None,
 4051: None,
@@ -8358,6 +8368,7 @@
 11055: None,
 11038: None,
 11825: None,
+983048: None,
 }
 _code_by_name = {
 }
@@ -8844,6 +8855,7 @@
 'BUGINESE VOWEL SIGN I': None,
 'BUGINESE VOWEL SIGN O': None,
 'BUGINESE VOWEL SIGN U': None,
+'BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS': None,
 'CANADIAN SYLLABICS AAY': None,
 'CANADIAN SYLLABICS AY': None,
 'CANADIAN SYLLABICS BEAVER DENE L': None,
@@ -12790,6 +12802,7 @@
 'KAITHI VOWEL SIGN O': None,
 'KAITHI VOWEL SIGN U': None,
 'KAITHI VOWEL SIGN UU': None,
+'KANNADA LETTER LLLA': None,
 'KANNADA SIGN AVAGRAHA': None,
 'KANNADA SIGN JIHVAMULIYA': None,
 'KANNADA SIGN NUKTA': None,
@@ -12994,6 +13007,10 @@
 'KHMER VOWEL SIGN AAM': None,
 'KHMER VOWEL SIGN COENG QA': None,
 'KHMER VOWEL SIGN OM': None,
+'LAO LETTER FO FAY': None,
+'LAO LETTER FO FON': None,
+'LAO LETTER LO': None,
+'LAO LETTER RO': None,
 'LARGE ONE DOT OVER TWO DOTS PUNCTUATION': None,
 'LARGE ONE RING OVER TWO RINGS PUNCTUATION': None,
 'LARGE TWO DOTS OVER ONE DOT PUNCTUATION': None,
@@ -13028,6 +13045,7 @@
 'LATIN CAPITAL LETTER EGYPTOLOGICAL AIN': None,
 'LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF': None,
 'LATIN CAPITAL LETTER ET': None,
+'LATIN CAPITAL LETTER GHA': None,
 'LATIN CAPITAL LETTER GLOTTAL STOP': None,
 'LATIN CAPITAL LETTER H WITH DESCENDER': None,
 'LATIN CAPITAL LETTER HALF H': None,
@@ -13191,6 +13209,7 @@
 'LATIN SMALL LETTER F WITH MIDDLE TILDE': None,
 'LATIN SMALL LETTER F WITH PALATAL HOOK': None,
 'LATIN SMALL LETTER G WITH PALATAL HOOK': None,
+'LATIN SMALL LETTER GHA': None,
 'LATIN SMALL LETTER GLOTTAL STOP': None,
 'LATIN SMALL LETTER H WITH DESCENDER': None,
 'LATIN SMALL LETTER HALF H': None,
@@ -14834,6 +14853,7 @@
 'PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET': None,
 'PRESENTATION FORM FOR VERTICAL QUESTION MARK': None,
 'PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET': None,
+'PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET': None,
 'PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET': None,
 'PRESENTATION FORM FOR VERTICAL SEMICOLON': None,
 'QUINCUNX': None,
@@ -15975,6 +15995,7 @@
 'THUNDER CLOUD AND RAIN': None,
 'TIBETAN LETTER KKA': None,
 'TIBETAN LETTER RRA': None,
+'TIBETAN MARK BKA- SHOG GI MGO RGYAN': None,
 'TIBETAN MARK BSKA- SHOG GI MGO RGYAN': None,
 'TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA': None,
 'TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA': None,
@@ -16709,6 +16730,7 @@
 'WHITE VERTICAL ELLIPSE': None,
 'WHITE VERY SMALL SQUARE': None,
 'WORD SEPARATOR MIDDLE DOT': None,
+'YI SYLLABLE ITERATION MARK': None,
 }
 
 _cjk_prefix = "CJK UNIFIED IDEOGRAPH-"
@@ -21257,8 +21279,19 @@
 
 
 def lookup_named_sequence(code):
-    if 0 <= code - 983296 < len(_named_sequences):
-        return _named_sequences[code - 983296]
+    if 0 <= code - 983552 < len(_named_sequences):
+        return _named_sequences[code - 983552]
     else:
         return None
 
+_name_aliases = [
+]
+
+
+def lookup_with_alias(name):
+    code = lookup(name)
+    if 0 <= code - 983040 < len(_name_aliases):
+        return _name_aliases[code - 983040]
+    else:
+        return code
+
diff --git a/rpython/rlib/unicodedata/unicodedb_5_2_0.py b/rpython/rlib/unicodedata/unicodedb_5_2_0.py
--- a/rpython/rlib/unicodedata/unicodedb_5_2_0.py
+++ b/rpython/rlib/unicodedata/unicodedb_5_2_0.py
@@ -190,6 +190,7 @@
 '\x07 KEFULA'
 '\x08 KEMBANG'
 '\x0e KISIM5 TIMES '
+'\x02 L'
 '\x05 LACA'
 '\x11 LAGAB TIMES ASH2'
 '\x11 LAGAR OVER LAGAR'
@@ -1349,7 +1350,7 @@
 '\x04CHOR'
 '\tCHOSEONG '
 '\x06CHRIVI'
-'\rCHROMA SYNAFI'
+'\x07CHROMA '
 '\rCHRYSANTHEMUM'
 '\x07CHU CAN'
 '\x05CHULA'
@@ -1377,6 +1378,7 @@
 '\x02CK'
 '\x07CK MARK'
 ')CK-TILTED SHADOWED WHITE RIGHTWARDS ARROW'
+'\x04CKET'
 '\x05CKING'
 '\x06CKNESS'
 '\x07CKWISE '
@@ -1932,6 +1934,7 @@
 '\x07ER THAN'
 '\x08ER TRUTH'
 '\x08ERAL URN'
+'\x0cERATION MARK'
 '\x0bERCENT SIGN'
 '\x07ERCIAL '
 '\tERCIAL AT'
@@ -2251,6 +2254,7 @@
 '\x04GHWA'
 '\x02GI'
 '\x07GI GUNU'
+'\x0cGI MGO RGYAN'
 '\x04GIBA'
 '\x06GICAL '
 '\x04GIDA'
@@ -2972,7 +2976,7 @@
 '\nK2 PLUS BU'
 '\x02K4'
 '\x02KA'
-'\x10KA- SHOG YIG MGO'
+'\tKA- SHOG '
 '\x04KAAF'
 '\x03KAB'
 '\tKABA TENU'
@@ -3003,6 +3007,7 @@
 '\x06KAYAH '
 '\x07KAYANNA'
 '\x12KBAR ISOLATED FORM'
+'\x04KCET'
 '\x02KE'
 '\x06KE PHO'
 '\x16KEEPING STILL MOUNTAIN'
@@ -3187,8 +3192,8 @@
 '\nLENDED YUS'
 '\x0bLENGTH MARK'
 '\x05LENIS'
+'\x0eLENTICULAR BRA'
 '\x12LENTICULAR BRACKET'
-'\x12LENTICULAR BRAKCET'
 '\x04LEPH'
 '\x0cLER CONSTANT'
 '\x08LESS SHA'
@@ -3276,7 +3281,6 @@
 '\x0eLMOST EQUAL TO'
 '\x0fLMOST EQUAL TO '
 '\x02LO'
-'\x04LO L'
 '\rLOCATION SIGN'
 '\x08LOCATIVE'
 '\tLOCKWISE '
@@ -5041,6 +5045,7 @@
 '\x0bSYMBOL FOR '
 '\x12SYMMETRIC SWAPPING'
 '\x16SYMPTOTICALLY EQUAL TO'
+'\x06SYNAFI'
 '\x07SYNAGMA'
 '\rSYNDESMOS NEO'
 '\tSYNTHETON'
@@ -5598,6 +5603,7 @@
 '\x13VARIANT WITH SQUARE'
 '\x13VARIATION INDICATOR'
 '\x0bVARYS ICHOS'
+'\x05VASIS'
 '\x13VASTNESS OR WASTING'
 '\x03VAV'
 '\x07VAV YOD'
@@ -5907,6 +5913,7 @@
 '\x06YGISMA'
 '\x02YI'
 '\x08YIDDISH '
+'\x07YIG MGO'
 '\x08YIG MGO '
 '\x0fYIG MGO MDUN MA'
 '\x14YIG MGO PHUR SHAD MA'
@@ -5998,6313 +6005,6313 @@
 '\x05ZYGOS'
 )
 _charnodes =[70758,
- -54016,
+ -54013,
  -1,
  132371,
- 28772,
+ 28800,
  -1,
  197694,
- 78442,
+ 78444,
  -1,
  262727,
- 136032,
+ 136035,
  -1,
  327957,
- 202072,
+ 202075,
  -1,
  393238,
- 282255,
+ 282270,
  -1,
  -65529,
- 347791,
+ 347806,
  195071,
  -65528,
- 409765,
+ 409767,
  195070,
  -65527,
- 472618,
+ 472620,
  195069,
  -65526,
- 535808,
+ 535811,
  195068,
  -65525,
- 599704,
+ 599707,
  195067,
  -65524,
- 660875,
+ 660878,
  195066,
  -65523,
- 726360,
+ 726363,
  195065,
  -65522,
- 791844,
+ 791847,
  195064,
  -65521,
- 857309,
+ 857312,
  195063,
  -65520,
- 922776,
+ 922779,
  195062,
  -65519,
- 988192,
+ 988195,
  195061,
  -65518,
- 1053651,
+ 1053654,
  195060,
  -65517,
- 1119075,
+ 1119078,
  195059,
  -65516,
- 1184446,
+ 1184449,
  195058,
  -65515,
- 1249830,
+ 1249833,
  195057,
  -1,
- 1315238,
+ 1315241,
  195056,
  1507367,
- 344229,
+ 344231,
  -1,
  -65512,
- 1461903,
+ 1461918,
  195055,
  -65511,
- 1523877,
+ 1523879,
  195054,
  -65510,
- 1586730,
+ 1586732,
  195053,
  -65509,
- 1649920,
+ 1649923,
  195052,
  -65508,
- 1713816,
+ 1713819,
  195051,
  -65507,
- 1774987,
+ 1774990,
  195050,
  -65506,
- 1840472,
+ 1840475,
  195049,
  -65505,
- 1905956,
+ 1905959,
  195048,
  -65504,
- 1971421,
+ 1971424,
  195047,
  -65503,
- 2036888,
+ 2036891,
  195046,
  -65502,
- 2102304,
+ 2102307,
  195045,
  -65501,
- 2167763,
+ 2167766,
  195044,
  -65500,
- 2233187,
+ 2233190,
  195043,
  -65499,
- 2298558,
+ 2298561,
  195042,
  -65498,
- 2363942,
+ 2363945,
  195041,
  -1,
- 2429350,
+ 2429353,
  195040,
  2621496,
- 1455658,
+ 1455660,
  -1,
  -65495,
- 2576015,
+ 2576030,
  195039,
  -65494,
- 2637989,
+ 2637991,
  195038,
  -65493,
- 2700842,
+ 2700844,
  195037,
  -65492,
- 2764032,
+ 2764035,
  195036,
  -65491,
- 2827928,
+ 2827931,
  195035,
  -65490,
- 2889099,
+ 2889102,
  195034,
  -65489,
- 2954584,
+ 2954587,
  195033,
  -65488,
- 3020068,
+ 3020071,
  195032,
  -65487,
- 3085533,
+ 3085536,
  195031,
  -65486,
- 3151000,
+ 3151003,
  195030,
  -65485,
- 3216416,
+ 3216419,
  195029,
  -65484,
- 3281875,
+ 3281878,
  195028,
  -65483,
- 3347299,
+ 3347302,
  195027,
  -65482,
- 3412670,
+ 3412673,
  195026,
  -65481,
- 3478054,
+ 3478057,
  195025,
  -1,
- 3543462,
+ 3543465,
  195024,
  3735625,
- 2567424,
+ 2567427,
  -1,
  -65478,
- 3690127,
+ 3690142,
  195023,
  -65477,
- 3752101,
+ 3752103,
  195022,
  -65476,
- 3814954,
+ 3814956,
  195021,
  -65475,
- 3878144,
+ 3878147,
  195020,
  -65474,
- 3942040,
+ 3942043,
  195019,
  -65473,
- 4003211,
+ 4003214,
  195018,
  -65472,
- 4068696,
+ 4068699,
  195017,
  -65471,
- 4134180,
+ 4134183,
  195016,
  -65470,
- 4199645,
+ 4199648,
  195015,
  -65469,
- 4265112,
+ 4265115,
  195014,
  -65468,
- 4330528,
+ 4330531,
  195013,
  -65467,
- 4395987,
+ 4395990,
  195012,
  -65466,
- 4461411,
+ 4461414,
  195011,
  -65465,
- 4526782,
+ 4526785,
  195010,
  -65464,
- 4592166,
+ 4592169,
  195009,
  -1,
- 4657574,
+ 4657577,
  195008,
  4849754,
- 3679896,
+ 3679899,
  -1,
  -65461,
- 4804239,
+ 4804254,
  195007,
  -65460,
- 4866213,
+ 4866215,
  195006,
  -65459,
- 4929066,
+ 4929068,
  195005,
  -65458,
- 4992256,
+ 4992259,
  195004,
  -65457,
- 5056152,
+ 5056155,
  195003,
  -65456,
- 5117323,
+ 5117326,
  195002,
  -65455,
- 5182808,
+ 5182811,
  195001,
  -65454,
- 5248292,
+ 5248295,
  195000,
  -65453,
- 5313757,
+ 5313760,
  194999,
  -65452,
- 5379224,
+ 5379227,
  194998,
  -65451,
- 5444640,
+ 5444643,
  194997,
  -65450,
- 5510099,
+ 5510102,
  194996,
  -65449,
- 5575523,
+ 5575526,
  194995,
  -65448,
- 5640894,
+ 5640897,
  194994,
  -65447,
- 5706278,
+ 5706281,
  194993,
  -1,
- 5771686,
+ 5771689,
  194992,
  5963883,
- 4789643,
+ 4789646,
  -1,
  -65444,
- 5918351,
+ 5918366,
  194991,
  -65443,
- 5980325,
+ 5980327,
  194990,
  -65442,
- 6043178,
+ 6043180,
  194989,
  -65441,
- 6106368,
+ 6106371,
  194988,
  -65440,
- 6170264,
+ 6170267,
  194987,
  -65439,
- 6231435,
+ 6231438,
  194986,
  -65438,
- 6296920,
+ 6296923,
  194985,
  -65437,
- 6362404,
+ 6362407,
  194984,
  -65436,
- 6427869,
+ 6427872,
  194983,
  -65435,
- 6493336,
+ 6493339,
  194982,
  -65434,
- 6558752,
+ 6558755,
  194981,
  -65433,
- 6624211,
+ 6624214,
  194980,
  -65432,
- 6689635,
+ 6689638,
  194979,
  -65431,
- 6755006,
+ 6755009,
  194978,
  -65430,
- 6820390,
+ 6820393,
  194977,
  -1,
- 6885798,
+ 6885801,
  194976,
  7078012,
- 5903704,
+ 5903707,
  -1,
  -65427,
- 7032463,
+ 7032478,
  194975,
  -65426,
- 7094437,
+ 7094439,
  194974,
  -65425,
- 7157290,
+ 7157292,
  194973,
  -65424,
- 7220480,
+ 7220483,
  194972,
  -65423,
- 7284376,
+ 7284379,
  194971,
  -65422,
- 7345547,
+ 7345550,
  194970,
  -65421,
- 7411032,
+ 7411035,
  194969,
  -65420,
- 7476516,
+ 7476519,
  194968,
  -65419,
- 7541981,
+ 7541984,
  194967,
  -65418,
- 7607448,
+ 7607451,
  194966,
  -65417,
- 7672864,
+ 7672867,
  194965,
  -65416,
- 7738323,
+ 7738326,
  194964,
  -65415,
- 7803747,
+ 7803750,
  194963,
  -65414,
- 7869118,
+ 7869121,
  194962,
  -65413,
- 7934502,
+ 7934505,
  194961,
  -1,
- 7999910,
+ 7999913,
  194960,
  8192141,
- 7017764,
+ 7017767,
  -1,
  -65410,
- 8146575,
+ 8146590,
  194959,
  -65409,
- 8208549,
+ 8208551,
  194958,
  -65408,
- 8271402,
+ 8271404,
  194957,
  -65407,
- 8334592,
+ 8334595,
  194956,
  -65406,
- 8398488,
+ 8398491,
  194955,
  -65405,
- 8459659,
+ 8459662,
  194954,
  -65404,
- 8525144,
+ 8525147,
  194953,
  -65403,
- 8590628,
+ 8590631,
  194952,
  -65402,
- 8656093,
+ 8656096,
  194951,
  -65401,
- 8721560,
+ 8721563,
  194950,
  -65400,
- 8786976,
+ 8786979,
  194949,
  -65399,
- 8852435,
+ 8852438,
  194948,
  -65398,
- 8917859,
+ 8917862,
  194947,
  -65397,
- 8983230,
+ 8983233,
  194946,
  -65396,
- 9048614,
+ 9048617,
  194945,
  -1,
- 9114022,
+ 9114025,
  194944,
  9306270,
- 8131805,
+ 8131808,
  -1,
  -65393,
- 9260687,
+ 9260702,
  194943,
  -65392,
- 9322661,
+ 9322663,
  194942,
  -65391,
- 9385514,
+ 9385516,
  194941,
  -65390,
- 9448704,
+ 9448707,
  194940,
  -65389,
- 9512600,
+ 9512603,
  194939,
  -65388,
- 9573771,
+ 9573774,
  194938,
  -65387,
- 9639256,
+ 9639259,
  194937,
  -65386,
- 9704740,
+ 9704743,
  194936,
  -65385,
- 9770205,
+ 9770208,
  194935,
  -65384,
- 9835672,
+ 9835675,
  194934,
  -65383,
- 9901088,
+ 9901091,
  194933,
  -65382,
- 9966547,
+ 9966550,
  194932,
  -65381,
- 10031971,
+ 10031974,
  194931,
  -65380,
- 10097342,
+ 10097345,
  194930,
  -65379,
- 10162726,
+ 10162729,
  194929,
  -1,
- 10228134,
+ 10228137,
  194928,
  10420399,
- 9245848,
+ 9245851,
  -1,
  -65376,
- 10374799,
+ 10374814,
  194927,
  -65375,
- 10436773,
+ 10436775,
  194926,
  -65374,
- 10499626,
+ 10499628,
  194925,
  -65373,
- 10562816,
+ 10562819,
  194924,
  -65372,
- 10626712,
+ 10626715,
  194923,
  -65371,
- 10687883,
+ 10687886,
  194922,
  -65370,
- 10753368,
+ 10753371,
  194921,
  -65369,
- 10818852,
+ 10818855,
  194920,
  -65368,
- 10884317,
+ 10884320,
  194919,
  -65367,
- 10949784,
+ 10949787,
  194918,
  -65366,
- 11015200,
+ 11015203,


More information about the pypy-commit mailing list