[pypy-commit] pypy py3.3: Add support for "named sequences" in unicode database

amauryfa noreply at buildbot.pypy.org
Mon Mar 16 01:16:21 CET 2015


Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: py3.3
Changeset: r76390:81b82168c663
Date: 2015-03-13 18:56 +0100
http://bitbucket.org/pypy/pypy/changeset/81b82168c663/

Log:	Add support for "named sequences" in unicode database

diff too long, truncating to 2000 out of 176971 lines

diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py
--- a/pypy/module/unicodedata/interp_ucd.py
+++ b/pypy/module/unicodedata/interp_ucd.py
@@ -76,6 +76,7 @@
 class UCD(W_Root):
     def __init__(self, unicodedb):
         self._lookup = unicodedb.lookup
+        self._lookup_named_sequence = unicodedb.lookup_named_sequence
         self._name = unicodedb.name
         self._decimal = unicodedb.decimal
         self._digit = unicodedb.digit
@@ -108,6 +109,13 @@
         except KeyError:
             msg = space.mod(space.wrap("undefined character name '%s'"), space.wrap(name))
             raise OperationError(space.w_KeyError, msg)
+
+        # The code may be a named sequence
+        sequence = self._lookup_named_sequence(code)
+        if sequence is not None:
+            # named sequences only contain UCS2 codes, no surrogates &co.
+            return space.wrap(sequence)
+
         return space.wrap(code_to_unichr(code))
 
     def name(self, space, w_unichr, w_default=None):
diff --git a/pypy/module/unicodedata/test/test_unicodedata.py b/pypy/module/unicodedata/test/test_unicodedata.py
--- a/pypy/module/unicodedata/test/test_unicodedata.py
+++ b/pypy/module/unicodedata/test/test_unicodedata.py
@@ -106,3 +106,16 @@
     def test_bidirectional(self):
         import unicodedata
         raises(TypeError, unicodedata.bidirectional, 'xx')
+
+    def test_named_sequences(self):
+        import unicodedata
+        sequences = [
+            ('LATIN SMALL LETTER R WITH TILDE', '\u0072\u0303'),
+            ('TAMIL SYLLABLE SAI', '\u0BB8\u0BC8'),
+            ('TAMIL SYLLABLE MOO', '\u0BAE\u0BCB'),
+            ('TAMIL SYLLABLE NNOO', '\u0BA3\u0BCB'),
+            ('TAMIL CONSONANT KSS', '\u0B95\u0BCD\u0BB7\u0BCD'),
+        ]
+        for seqname, codepoints in sequences:
+            assert unicodedata.lookup(seqname) == codepoints
+
diff --git a/rpython/rlib/unicodedata/NameAliases-3.2.0.txt b/rpython/rlib/unicodedata/NameAliases-3.2.0.txt
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/unicodedata/NameAliases-3.2.0.txt
@@ -0,0 +1,1 @@
+# NameAliases-3.2.0.txt does not exist.
diff --git a/rpython/rlib/unicodedata/NameAliases-5.2.0.txt b/rpython/rlib/unicodedata/NameAliases-5.2.0.txt
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/unicodedata/NameAliases-5.2.0.txt
@@ -0,0 +1,40 @@
+# NameAliases-5.2.0.txt
+# Date: 2009-05-22, 13:05:00 PDT [KW]
+#
+# This file is a normative contributory data file in the
+# Unicode Character Database.
+#
+# Copyright (c) 2005-2009 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# This file defines the formal name aliases for Unicode characters.
+#
+# For informative aliases see NamesList.txt
+#
+# For documentation, see NamesList.html and http://www.unicode.org/reports/tr44/
+#
+# FORMAT
+#
+# Each line has two fields
+# First field: Code point
+# Second field: Alias
+#
+# In case multiple aliases are assigned, additional aliases
+# would be provided on separate lines
+#
+#-----------------------------------------------------------------
+01A2;LATIN CAPITAL LETTER GHA
+01A3;LATIN SMALL LETTER GHA
+0CDE;KANNADA LETTER LLLA
+0E9D;LAO LETTER FO FON
+0E9F;LAO LETTER FO FAY
+0EA3;LAO LETTER RO
+0EA5;LAO LETTER LO
+0FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN
+A015;YI SYLLABLE ITERATION MARK
+FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET
+1D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS
+
+# Total code points: 11
+
+# EOF
diff --git a/rpython/rlib/unicodedata/NameAliases-6.0.0.txt b/rpython/rlib/unicodedata/NameAliases-6.0.0.txt
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/unicodedata/NameAliases-6.0.0.txt
@@ -0,0 +1,40 @@
+# NameAliases-6.0.0.txt
+# Date: 2010-05-10, 11:58:00 PDT [KW]
+#
+# This file is a normative contributory data file in the
+# Unicode Character Database.
+#
+# Copyright (c) 2005-2010 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# This file defines the formal name aliases for Unicode characters.
+#
+# For informative aliases see NamesList.txt
+#
+# For documentation, see NamesList.html and http://www.unicode.org/reports/tr44/
+#
+# FORMAT
+#
+# Each line has two fields
+# First field: Code point
+# Second field: Alias
+#
+# In case multiple aliases are assigned, additional aliases
+# would be provided on separate lines
+#
+#-----------------------------------------------------------------
+01A2;LATIN CAPITAL LETTER GHA
+01A3;LATIN SMALL LETTER GHA
+0CDE;KANNADA LETTER LLLA
+0E9D;LAO LETTER FO FON
+0E9F;LAO LETTER FO FAY
+0EA3;LAO LETTER RO
+0EA5;LAO LETTER LO
+0FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN
+A015;YI SYLLABLE ITERATION MARK
+FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET
+1D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS
+
+# Total code points: 11
+
+# EOF
diff --git a/rpython/rlib/unicodedata/NameAliases-6.2.0.txt b/rpython/rlib/unicodedata/NameAliases-6.2.0.txt
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/unicodedata/NameAliases-6.2.0.txt
@@ -0,0 +1,509 @@
+# NameAliases-6.2.0.txt
+# Date: 2012-05-15, 18:44:00 GMT [KW]
+#
+# This file is a normative contributory data file in the
+# Unicode Character Database.
+#
+# Copyright (c) 2005-2012 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# This file defines the formal name aliases for Unicode characters.
+#
+# For informative aliases see NamesList.txt
+#
+# The formal name aliases are divided into five types.
+#
+# 1. Corrections for serious problems in the character names
+# 2. ISO 6429 names for C0 and C1 control functions, and other
+#    commonly occurring names for control codes
+# 3. A few widely used alternate names for format characters
+# 4. Several documented labels for C1 control code points which
+#    were never actually approved in any standard
+# 5. Commonly occurring abbreviations (or acronyms) for control codes,
+#    format characters, spaces, and variation selectors
+#
+# The formal name aliases are part of the Unicode character namespace, which
+# includes the character names and the names of named character sequences.
+# The inclusion of ISO 6429 names and other commonly occurring names and
+# abbreviations for control codes and format characters as formal name aliases
+# is to help avoid name collisions between Unicode character names and the 
+# labels which commonly appear in text and/or in implementations such as regex, for
+# control codes (which have no Unicode character name) or for format characters.
+#
+# For documentation, see NamesList.html and http://www.unicode.org/reports/tr44/
+#
+# FORMAT
+#
+# Each line has three fields, as described here:
+#
+# First field:  Code point
+# Second field: Alias
+# Third field:  Type
+#
+# The Type labels used are: correction, control, alternate, figment, abbreviation
+#
+# Those Type labels can be mapped to other strings for display, if desired.
+#
+# In case multiple aliases are assigned, additional aliases
+# are provided on separate lines. Parsers of this data file should
+# take note that the same code point can (and does) occur more than once.
+#
+#-----------------------------------------------------------------
+
+0000;NULL;control
+0000;NUL;abbreviation
+0001;START OF HEADING;control
+0001;SOH;abbreviation
+0002;START OF TEXT;control
+0002;STX;abbreviation
+0003;END OF TEXT;control
+0003;ETX;abbreviation
+0004;END OF TRANSMISSION;control
+0004;EOT;abbreviation
+0005;ENQUIRY;control
+0005;ENQ;abbreviation
+0006;ACKNOWLEDGE;control
+0006;ACK;abbreviation
+
+# Note that no formal name alias for the ISO 6429 "BELL" is
+# provided for U+0007, because of the existing name collision
+# with U+1F514 BELL.
+
+0007;ALERT;control
+0007;BEL;abbreviation
+0008;BACKSPACE;control
+0008;BS;abbreviation
+0009;CHARACTER TABULATION;control
+0009;HORIZONTAL TABULATION;control
+0009;HT;abbreviation
+0009;TAB;abbreviation
+000A;LINE FEED;control
+000A;NEW LINE;control
+000A;END OF LINE;control
+000A;LF;abbreviation
+000A;NL;abbreviation
+000A;EOL;abbreviation
+000B;LINE TABULATION;control
+000B;VERTICAL TABULATION;control
+000B;VT;abbreviation
+000C;FORM FEED;control
+000C;FF;abbreviation
+000D;CARRIAGE RETURN;control
+000D;CR;abbreviation
+000E;SHIFT OUT;control
+000E;LOCKING-SHIFT ONE;control
+000E;SO;abbreviation
+000F;SHIFT IN;control
+000F;LOCKING-SHIFT ZERO;control
+000F;SI;abbreviation
+0010;DATA LINK ESCAPE;control
+0010;DLE;abbreviation
+0011;DEVICE CONTROL ONE;control
+0011;DC1;abbreviation
+0012;DEVICE CONTROL TWO;control
+0012;DC2;abbreviation
+0013;DEVICE CONTROL THREE;control
+0013;DC3;abbreviation
+0014;DEVICE CONTROL FOUR;control
+0014;DC4;abbreviation
+0015;NEGATIVE ACKNOWLEDGE;control
+0015;NAK;abbreviation
+0016;SYNCHRONOUS IDLE;control
+0016;SYN;abbreviation
+0017;END OF TRANSMISSION BLOCK;control
+0017;ETB;abbreviation
+0018;CANCEL;control
+0018;CAN;abbreviation
+0019;END OF MEDIUM;control
+0019;EOM;abbreviation
+001A;SUBSTITUTE;control
+001A;SUB;abbreviation
+001B;ESCAPE;control
+001B;ESC;abbreviation
+001C;INFORMATION SEPARATOR FOUR;control
+001C;FILE SEPARATOR;control
+001C;FS;abbreviation
+001D;INFORMATION SEPARATOR THREE;control
+001D;GROUP SEPARATOR;control
+001D;GS;abbreviation
+001E;INFORMATION SEPARATOR TWO;control
+001E;RECORD SEPARATOR;control
+001E;RS;abbreviation
+001F;INFORMATION SEPARATOR ONE;control
+001F;UNIT SEPARATOR;control
+001F;US;abbreviation
+0020;SP;abbreviation
+007F;DELETE;control
+007F;DEL;abbreviation
+0080;PADDING CHARACTER;figment
+0080;PAD;abbreviation
+0081;HIGH OCTET PRESET;figment
+0081;HOP;abbreviation
+0082;BREAK PERMITTED HERE;control
+0082;BPH;abbreviation
+0083;NO BREAK HERE;control
+0083;NBH;abbreviation
+0084;INDEX;control
+0084;IND;abbreviation
+0085;NEXT LINE;control
+0085;NEL;abbreviation
+0086;START OF SELECTED AREA;control
+0086;SSA;abbreviation
+0087;END OF SELECTED AREA;control
+0087;ESA;abbreviation
+0088;CHARACTER TABULATION SET;control
+0088;HORIZONTAL TABULATION SET;control
+0088;HTS;abbreviation
+0089;CHARACTER TABULATION WITH JUSTIFICATION;control
+0089;HORIZONTAL TABULATION WITH JUSTIFICATION;control
+0089;HTJ;abbreviation
+008A;LINE TABULATION SET;control
+008A;VERTICAL TABULATION SET;control
+008A;VTS;abbreviation
+008B;PARTIAL LINE FORWARD;control
+008B;PARTIAL LINE DOWN;control
+008B;PLD;abbreviation
+008C;PARTIAL LINE BACKWARD;control
+008C;PARTIAL LINE UP;control
+008C;PLU;abbreviation
+008D;REVERSE LINE FEED;control
+008D;REVERSE INDEX;control
+008D;RI;abbreviation
+008E;SINGLE SHIFT TWO;control
+008E;SINGLE-SHIFT-2;control
+008E;SS2;abbreviation
+008F;SINGLE SHIFT THREE;control
+008F;SINGLE-SHIFT-3;control
+008F;SS3;abbreviation
+0090;DEVICE CONTROL STRING;control
+0090;DCS;abbreviation
+0091;PRIVATE USE ONE;control
+0091;PRIVATE USE-1;control
+0091;PU1;abbreviation
+0092;PRIVATE USE TWO;control
+0092;PRIVATE USE-2;control
+0092;PU2;abbreviation
+0093;SET TRANSMIT STATE;control
+0093;STS;abbreviation
+0094;CANCEL CHARACTER;control
+0094;CCH;abbreviation
+0095;MESSAGE WAITING;control
+0095;MW;abbreviation
+0096;START OF GUARDED AREA;control
+0096;START OF PROTECTED AREA;control
+0096;SPA;abbreviation
+0097;END OF GUARDED AREA;control
+0097;END OF PROTECTED AREA;control
+0097;EPA;abbreviation
+0098;START OF STRING;control
+0098;SOS;abbreviation
+0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment
+0099;SGC;abbreviation
+009A;SINGLE CHARACTER INTRODUCER;control
+009A;SCI;abbreviation
+009B;CONTROL SEQUENCE INTRODUCER;control
+009B;CSI;abbreviation
+009C;STRING TERMINATOR;control
+009C;ST;abbreviation
+009D;OPERATING SYSTEM COMMAND;control
+009D;OSC;abbreviation
+009E;PRIVACY MESSAGE;control
+009E;PM;abbreviation
+009F;APPLICATION PROGRAM COMMAND;control
+009F;APC;abbreviation
+00A0;NBSP;abbreviation
+00AD;SHY;abbreviation
+01A2;LATIN CAPITAL LETTER GHA;correction
+01A3;LATIN SMALL LETTER GHA;correction
+034F;CGJ;abbreviation
+0709;SYRIAC SUBLINEAR COLON SKEWED LEFT;correction
+0CDE;KANNADA LETTER LLLA;correction
+0E9D;LAO LETTER FO FON;correction
+0E9F;LAO LETTER FO FAY;correction
+0EA3;LAO LETTER RO;correction
+0EA5;LAO LETTER LO;correction
+0FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN;correction
+180B;FVS1;abbreviation
+180C;FVS2;abbreviation
+180D;FVS3;abbreviation
+180E;MVS;abbreviation
+200B;ZWSP;abbreviation
+200C;ZWNJ;abbreviation
+200D;ZWJ;abbreviation
+200E;LRM;abbreviation
+200F;RLM;abbreviation
+202A;LRE;abbreviation
+202B;RLE;abbreviation
+202C;PDF;abbreviation
+202D;LRO;abbreviation
+202E;RLO;abbreviation
+202F;NNBSP;abbreviation
+205F;MMSP;abbreviation
+2060;WJ;abbreviation
+2118;WEIERSTRASS ELLIPTIC FUNCTION;correction
+2448;MICR ON US SYMBOL;correction
+2449;MICR DASH SYMBOL;correction
+A015;YI SYLLABLE ITERATION MARK;correction
+FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET;correction
+FE00;VS1;abbreviation
+FE01;VS2;abbreviation
+FE02;VS3;abbreviation
+FE03;VS4;abbreviation
+FE04;VS5;abbreviation
+FE05;VS6;abbreviation
+FE06;VS7;abbreviation
+FE07;VS8;abbreviation
+FE08;VS9;abbreviation
+FE09;VS10;abbreviation
+FE0A;VS11;abbreviation
+FE0B;VS12;abbreviation
+FE0C;VS13;abbreviation
+FE0D;VS14;abbreviation
+FE0E;VS15;abbreviation
+FE0F;VS16;abbreviation
+FEFF;BYTE ORDER MARK;alternate
+FEFF;BOM;abbreviation
+FEFF;ZWNBSP;abbreviation
+1D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS;correction
+E0100;VS17;abbreviation
+E0101;VS18;abbreviation
+E0102;VS19;abbreviation
+E0103;VS20;abbreviation
+E0104;VS21;abbreviation
+E0105;VS22;abbreviation
+E0106;VS23;abbreviation
+E0107;VS24;abbreviation
+E0108;VS25;abbreviation
+E0109;VS26;abbreviation
+E010A;VS27;abbreviation
+E010B;VS28;abbreviation
+E010C;VS29;abbreviation
+E010D;VS30;abbreviation
+E010E;VS31;abbreviation
+E010F;VS32;abbreviation
+E0110;VS33;abbreviation
+E0111;VS34;abbreviation
+E0112;VS35;abbreviation
+E0113;VS36;abbreviation
+E0114;VS37;abbreviation
+E0115;VS38;abbreviation
+E0116;VS39;abbreviation
+E0117;VS40;abbreviation
+E0118;VS41;abbreviation
+E0119;VS42;abbreviation
+E011A;VS43;abbreviation
+E011B;VS44;abbreviation
+E011C;VS45;abbreviation
+E011D;VS46;abbreviation
+E011E;VS47;abbreviation
+E011F;VS48;abbreviation
+E0120;VS49;abbreviation
+E0121;VS50;abbreviation
+E0122;VS51;abbreviation
+E0123;VS52;abbreviation
+E0124;VS53;abbreviation
+E0125;VS54;abbreviation
+E0126;VS55;abbreviation
+E0127;VS56;abbreviation
+E0128;VS57;abbreviation
+E0129;VS58;abbreviation
+E012A;VS59;abbreviation
+E012B;VS60;abbreviation
+E012C;VS61;abbreviation
+E012D;VS62;abbreviation
+E012E;VS63;abbreviation
+E012F;VS64;abbreviation
+E0130;VS65;abbreviation
+E0131;VS66;abbreviation
+E0132;VS67;abbreviation
+E0133;VS68;abbreviation
+E0134;VS69;abbreviation
+E0135;VS70;abbreviation
+E0136;VS71;abbreviation
+E0137;VS72;abbreviation
+E0138;VS73;abbreviation
+E0139;VS74;abbreviation
+E013A;VS75;abbreviation
+E013B;VS76;abbreviation
+E013C;VS77;abbreviation
+E013D;VS78;abbreviation
+E013E;VS79;abbreviation
+E013F;VS80;abbreviation
+E0140;VS81;abbreviation
+E0141;VS82;abbreviation
+E0142;VS83;abbreviation
+E0143;VS84;abbreviation
+E0144;VS85;abbreviation
+E0145;VS86;abbreviation
+E0146;VS87;abbreviation
+E0147;VS88;abbreviation
+E0148;VS89;abbreviation
+E0149;VS90;abbreviation
+E014A;VS91;abbreviation
+E014B;VS92;abbreviation
+E014C;VS93;abbreviation
+E014D;VS94;abbreviation
+E014E;VS95;abbreviation
+E014F;VS96;abbreviation
+E0150;VS97;abbreviation
+E0151;VS98;abbreviation
+E0152;VS99;abbreviation
+E0153;VS100;abbreviation
+E0154;VS101;abbreviation
+E0155;VS102;abbreviation
+E0156;VS103;abbreviation
+E0157;VS104;abbreviation
+E0158;VS105;abbreviation
+E0159;VS106;abbreviation
+E015A;VS107;abbreviation
+E015B;VS108;abbreviation
+E015C;VS109;abbreviation
+E015D;VS110;abbreviation
+E015E;VS111;abbreviation
+E015F;VS112;abbreviation
+E0160;VS113;abbreviation
+E0161;VS114;abbreviation
+E0162;VS115;abbreviation
+E0163;VS116;abbreviation
+E0164;VS117;abbreviation
+E0165;VS118;abbreviation
+E0166;VS119;abbreviation
+E0167;VS120;abbreviation
+E0168;VS121;abbreviation
+E0169;VS122;abbreviation
+E016A;VS123;abbreviation
+E016B;VS124;abbreviation
+E016C;VS125;abbreviation
+E016D;VS126;abbreviation
+E016E;VS127;abbreviation
+E016F;VS128;abbreviation
+E0170;VS129;abbreviation
+E0171;VS130;abbreviation
+E0172;VS131;abbreviation
+E0173;VS132;abbreviation
+E0174;VS133;abbreviation
+E0175;VS134;abbreviation
+E0176;VS135;abbreviation
+E0177;VS136;abbreviation
+E0178;VS137;abbreviation
+E0179;VS138;abbreviation
+E017A;VS139;abbreviation
+E017B;VS140;abbreviation
+E017C;VS141;abbreviation
+E017D;VS142;abbreviation
+E017E;VS143;abbreviation
+E017F;VS144;abbreviation
+E0180;VS145;abbreviation
+E0181;VS146;abbreviation
+E0182;VS147;abbreviation
+E0183;VS148;abbreviation
+E0184;VS149;abbreviation
+E0185;VS150;abbreviation
+E0186;VS151;abbreviation
+E0187;VS152;abbreviation
+E0188;VS153;abbreviation
+E0189;VS154;abbreviation
+E018A;VS155;abbreviation
+E018B;VS156;abbreviation
+E018C;VS157;abbreviation
+E018D;VS158;abbreviation
+E018E;VS159;abbreviation
+E018F;VS160;abbreviation
+E0190;VS161;abbreviation
+E0191;VS162;abbreviation
+E0192;VS163;abbreviation
+E0193;VS164;abbreviation
+E0194;VS165;abbreviation
+E0195;VS166;abbreviation
+E0196;VS167;abbreviation
+E0197;VS168;abbreviation
+E0198;VS169;abbreviation
+E0199;VS170;abbreviation
+E019A;VS171;abbreviation
+E019B;VS172;abbreviation
+E019C;VS173;abbreviation
+E019D;VS174;abbreviation
+E019E;VS175;abbreviation
+E019F;VS176;abbreviation
+E01A0;VS177;abbreviation
+E01A1;VS178;abbreviation
+E01A2;VS179;abbreviation
+E01A3;VS180;abbreviation
+E01A4;VS181;abbreviation
+E01A5;VS182;abbreviation
+E01A6;VS183;abbreviation
+E01A7;VS184;abbreviation
+E01A8;VS185;abbreviation
+E01A9;VS186;abbreviation
+E01AA;VS187;abbreviation
+E01AB;VS188;abbreviation
+E01AC;VS189;abbreviation
+E01AD;VS190;abbreviation
+E01AE;VS191;abbreviation
+E01AF;VS192;abbreviation
+E01B0;VS193;abbreviation
+E01B1;VS194;abbreviation
+E01B2;VS195;abbreviation
+E01B3;VS196;abbreviation
+E01B4;VS197;abbreviation
+E01B5;VS198;abbreviation
+E01B6;VS199;abbreviation
+E01B7;VS200;abbreviation
+E01B8;VS201;abbreviation
+E01B9;VS202;abbreviation
+E01BA;VS203;abbreviation
+E01BB;VS204;abbreviation
+E01BC;VS205;abbreviation
+E01BD;VS206;abbreviation
+E01BE;VS207;abbreviation
+E01BF;VS208;abbreviation
+E01C0;VS209;abbreviation
+E01C1;VS210;abbreviation
+E01C2;VS211;abbreviation
+E01C3;VS212;abbreviation
+E01C4;VS213;abbreviation
+E01C5;VS214;abbreviation
+E01C6;VS215;abbreviation
+E01C7;VS216;abbreviation
+E01C8;VS217;abbreviation
+E01C9;VS218;abbreviation
+E01CA;VS219;abbreviation
+E01CB;VS220;abbreviation
+E01CC;VS221;abbreviation
+E01CD;VS222;abbreviation
+E01CE;VS223;abbreviation
+E01CF;VS224;abbreviation
+E01D0;VS225;abbreviation
+E01D1;VS226;abbreviation
+E01D2;VS227;abbreviation
+E01D3;VS228;abbreviation
+E01D4;VS229;abbreviation
+E01D5;VS230;abbreviation
+E01D6;VS231;abbreviation
+E01D7;VS232;abbreviation
+E01D8;VS233;abbreviation
+E01D9;VS234;abbreviation
+E01DA;VS235;abbreviation
+E01DB;VS236;abbreviation
+E01DC;VS237;abbreviation
+E01DD;VS238;abbreviation
+E01DE;VS239;abbreviation
+E01DF;VS240;abbreviation
+E01E0;VS241;abbreviation
+E01E1;VS242;abbreviation
+E01E2;VS243;abbreviation
+E01E3;VS244;abbreviation
+E01E4;VS245;abbreviation
+E01E5;VS246;abbreviation
+E01E6;VS247;abbreviation
+E01E7;VS248;abbreviation
+E01E8;VS249;abbreviation
+E01E9;VS250;abbreviation
+E01EA;VS251;abbreviation
+E01EB;VS252;abbreviation
+E01EC;VS253;abbreviation
+E01ED;VS254;abbreviation
+E01EE;VS255;abbreviation
+E01EF;VS256;abbreviation
+
+# EOF
diff --git a/rpython/rlib/unicodedata/NamedSequences-3.2.0.txt b/rpython/rlib/unicodedata/NamedSequences-3.2.0.txt
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/unicodedata/NamedSequences-3.2.0.txt
@@ -0,0 +1,1 @@
+# NamedSequences-3.2.0.txt does not exist.
diff --git a/rpython/rlib/unicodedata/NamedSequences-5.2.0.txt b/rpython/rlib/unicodedata/NamedSequences-5.2.0.txt
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/unicodedata/NamedSequences-5.2.0.txt
@@ -0,0 +1,448 @@
+# NamedSequences-5.2.0.txt
+# Date: 2009-09-14, 12:44:00 PDT [KW]
+#
+# Unicode Character Database
+# Copyright (c) 1991-2009 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Format:
+# Name of Sequence; Code Point Sequence for USI
+#
+# Note: The order of entries in this file is not significant.
+# However, entries are generally in script order corresponding
+# to block order in the Unicode Standard, to make it easier
+# to find entries in the list.
+
+# ================================================
+
+LATIN CAPITAL LETTER A WITH MACRON AND GRAVE;0100 0300
+LATIN SMALL LETTER A WITH MACRON AND GRAVE;0101 0300
+LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW;0045 0329
+LATIN SMALL LETTER E WITH VERTICAL LINE BELOW;0065 0329
+LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND GRAVE;00C8 0329
+LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND GRAVE;00E8 0329
+LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND ACUTE;00C9 0329
+LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND ACUTE;00E9 0329
+LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON;00CA 0304
+LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON;00EA 0304
+LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON;00CA 030C
+LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON;00EA 030C
+LATIN CAPITAL LETTER I WITH MACRON AND GRAVE;012A 0300
+LATIN SMALL LETTER I WITH MACRON AND GRAVE;012B 0300
+LATIN SMALL LETTER I WITH DOT ABOVE AND ACUTE;0069 0307 0301
+LATIN SMALL LETTER NG WITH TILDE ABOVE;006E 0360 0067
+LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW;004F 0329
+LATIN SMALL LETTER O WITH VERTICAL LINE BELOW;006F 0329
+LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND GRAVE;00D2 0329
+LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND GRAVE;00F2 0329
+LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND ACUTE;00D3 0329
+LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND ACUTE;00F3 0329
+LATIN CAPITAL LETTER S WITH VERTICAL LINE BELOW;0053 0329
+LATIN SMALL LETTER S WITH VERTICAL LINE BELOW;0073 0329
+LATIN CAPITAL LETTER U WITH MACRON AND GRAVE;016A 0300
+LATIN SMALL LETTER U WITH MACRON AND GRAVE;016B 0300
+
+# Additions for Lithuanian. Provisional 2006-05-18, Approved 2007-10-19
+
+LATIN CAPITAL LETTER A WITH OGONEK AND ACUTE;0104 0301
+LATIN SMALL LETTER A WITH OGONEK AND ACUTE;0105 0301
+LATIN CAPITAL LETTER A WITH OGONEK AND TILDE;0104 0303
+LATIN SMALL LETTER A WITH OGONEK AND TILDE;0105 0303
+LATIN CAPITAL LETTER E WITH OGONEK AND ACUTE;0118 0301
+LATIN SMALL LETTER E WITH OGONEK AND ACUTE;0119 0301
+LATIN CAPITAL LETTER E WITH OGONEK AND TILDE;0118 0303
+LATIN SMALL LETTER E WITH OGONEK AND TILDE;0119 0303
+LATIN CAPITAL LETTER E WITH DOT ABOVE AND ACUTE;0116 0301
+LATIN SMALL LETTER E WITH DOT ABOVE AND ACUTE;0117 0301
+LATIN CAPITAL LETTER E WITH DOT ABOVE AND TILDE;0116 0303
+LATIN SMALL LETTER E WITH DOT ABOVE AND TILDE;0117 0303
+LATIN SMALL LETTER I WITH DOT ABOVE AND GRAVE;0069 0307 0300
+LATIN SMALL LETTER I WITH DOT ABOVE AND TILDE;0069 0307 0303
+LATIN CAPITAL LETTER I WITH OGONEK AND ACUTE;012E 0301
+LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND ACUTE;012F 0307 0301
+LATIN CAPITAL LETTER I WITH OGONEK AND TILDE;012E 0303
+LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND TILDE;012F 0307 0303
+LATIN CAPITAL LETTER J WITH TILDE;004A 0303
+LATIN SMALL LETTER J WITH DOT ABOVE AND TILDE;006A 0307 0303
+LATIN CAPITAL LETTER L WITH TILDE;004C 0303
+LATIN SMALL LETTER L WITH TILDE;006C 0303
+LATIN CAPITAL LETTER M WITH TILDE;004D 0303
+LATIN SMALL LETTER M WITH TILDE;006D 0303
+LATIN CAPITAL LETTER R WITH TILDE;0052 0303
+LATIN SMALL LETTER R WITH TILDE;0072 0303
+LATIN CAPITAL LETTER U WITH OGONEK AND ACUTE;0172 0301
+LATIN SMALL LETTER U WITH OGONEK AND ACUTE;0173 0301
+LATIN CAPITAL LETTER U WITH OGONEK AND TILDE;0172 0303
+LATIN SMALL LETTER U WITH OGONEK AND TILDE;0173 0303
+LATIN CAPITAL LETTER U WITH MACRON AND ACUTE;016A 0301
+LATIN SMALL LETTER U WITH MACRON AND ACUTE;016B 0301
+LATIN CAPITAL LETTER U WITH MACRON AND TILDE;016A 0303
+LATIN SMALL LETTER U WITH MACRON AND TILDE;016B 0303
+
+# Additions for Tamil. Provisional 2008-02-08, Approved 2009-08-14
+#
+# A visual display of the Tamil named sequences is available
+# in the documentation for Unicode 5.2. See:
+# http://www.unicode.org/versions/Unicode5.2.0/
+
+TAMIL CONSONANT K;  0B95 0BCD
+TAMIL CONSONANT NG; 0B99 0BCD
+TAMIL CONSONANT C;  0B9A 0BCD
+TAMIL CONSONANT NY; 0B9E 0BCD
+TAMIL CONSONANT TT; 0B9F 0BCD
+TAMIL CONSONANT NN; 0BA3 0BCD
+TAMIL CONSONANT T;  0BA4 0BCD
+TAMIL CONSONANT N;  0BA8 0BCD
+TAMIL CONSONANT P;  0BAA 0BCD
+TAMIL CONSONANT M;  0BAE 0BCD
+TAMIL CONSONANT Y;  0BAF 0BCD
+TAMIL CONSONANT R;  0BB0 0BCD
+TAMIL CONSONANT L;  0BB2 0BCD
+TAMIL CONSONANT V;  0BB5 0BCD
+TAMIL CONSONANT LLL;0BB4 0BCD
+TAMIL CONSONANT LL; 0BB3 0BCD
+TAMIL CONSONANT RR; 0BB1 0BCD
+TAMIL CONSONANT NNN;0BA9 0BCD
+TAMIL CONSONANT J;  0B9C 0BCD
+TAMIL CONSONANT SH; 0BB6 0BCD
+TAMIL CONSONANT SS; 0BB7 0BCD
+TAMIL CONSONANT S;  0BB8 0BCD
+TAMIL CONSONANT H;  0BB9 0BCD
+TAMIL CONSONANT KSS;0B95 0BCD 0BB7 0BCD
+
+TAMIL SYLLABLE KAA; 0B95 0BBE
+TAMIL SYLLABLE KI;  0B95 0BBF
+TAMIL SYLLABLE KII; 0B95 0BC0
+TAMIL SYLLABLE KU;  0B95 0BC1
+TAMIL SYLLABLE KUU; 0B95 0BC2
+TAMIL SYLLABLE KE;  0B95 0BC6
+TAMIL SYLLABLE KEE; 0B95 0BC7
+TAMIL SYLLABLE KAI; 0B95 0BC8
+TAMIL SYLLABLE KO;  0B95 0BCA
+TAMIL SYLLABLE KOO; 0B95 0BCB
+TAMIL SYLLABLE KAU; 0B95 0BCC
+
+TAMIL SYLLABLE NGAA; 0B99 0BBE
+TAMIL SYLLABLE NGI;  0B99 0BBF
+TAMIL SYLLABLE NGII; 0B99 0BC0
+TAMIL SYLLABLE NGU;  0B99 0BC1
+TAMIL SYLLABLE NGUU; 0B99 0BC2
+TAMIL SYLLABLE NGE;  0B99 0BC6
+TAMIL SYLLABLE NGEE; 0B99 0BC7
+TAMIL SYLLABLE NGAI; 0B99 0BC8
+TAMIL SYLLABLE NGO;  0B99 0BCA
+TAMIL SYLLABLE NGOO; 0B99 0BCB
+TAMIL SYLLABLE NGAU; 0B99 0BCC
+
+TAMIL SYLLABLE CAA; 0B9A 0BBE
+TAMIL SYLLABLE CI;  0B9A 0BBF
+TAMIL SYLLABLE CII; 0B9A 0BC0
+TAMIL SYLLABLE CU;  0B9A 0BC1
+TAMIL SYLLABLE CUU; 0B9A 0BC2
+TAMIL SYLLABLE CE;  0B9A 0BC6
+TAMIL SYLLABLE CEE; 0B9A 0BC7
+TAMIL SYLLABLE CAI; 0B9A 0BC8
+TAMIL SYLLABLE CO;  0B9A 0BCA
+TAMIL SYLLABLE COO; 0B9A 0BCB
+TAMIL SYLLABLE CAU; 0B9A 0BCC
+
+TAMIL SYLLABLE NYAA; 0B9E 0BBE
+TAMIL SYLLABLE NYI;  0B9E 0BBF
+TAMIL SYLLABLE NYII; 0B9E 0BC0
+TAMIL SYLLABLE NYU;  0B9E 0BC1
+TAMIL SYLLABLE NYUU; 0B9E 0BC2
+TAMIL SYLLABLE NYE;  0B9E 0BC6
+TAMIL SYLLABLE NYEE; 0B9E 0BC7
+TAMIL SYLLABLE NYAI; 0B9E 0BC8
+TAMIL SYLLABLE NYO;  0B9E 0BCA
+TAMIL SYLLABLE NYOO; 0B9E 0BCB
+TAMIL SYLLABLE NYAU; 0B9E 0BCC
+
+TAMIL SYLLABLE TTAA; 0B9F 0BBE
+TAMIL SYLLABLE TTI;  0B9F 0BBF
+TAMIL SYLLABLE TTII; 0B9F 0BC0
+TAMIL SYLLABLE TTU;  0B9F 0BC1
+TAMIL SYLLABLE TTUU; 0B9F 0BC2
+TAMIL SYLLABLE TTE;  0B9F 0BC6
+TAMIL SYLLABLE TTEE; 0B9F 0BC7
+TAMIL SYLLABLE TTAI; 0B9F 0BC8
+TAMIL SYLLABLE TTO;  0B9F 0BCA
+TAMIL SYLLABLE TTOO; 0B9F 0BCB
+TAMIL SYLLABLE TTAU; 0B9F 0BCC
+
+TAMIL SYLLABLE NNAA; 0BA3 0BBE
+TAMIL SYLLABLE NNI;  0BA3 0BBF
+TAMIL SYLLABLE NNII; 0BA3 0BC0
+TAMIL SYLLABLE NNU;  0BA3 0BC1
+TAMIL SYLLABLE NNUU; 0BA3 0BC2
+TAMIL SYLLABLE NNE;  0BA3 0BC6
+TAMIL SYLLABLE NNEE; 0BA3 0BC7
+TAMIL SYLLABLE NNAI; 0BA3 0BC8
+TAMIL SYLLABLE NNO;  0BA3 0BCA
+TAMIL SYLLABLE NNOO; 0BA3 0BCB
+TAMIL SYLLABLE NNAU; 0BA3 0BCC
+
+TAMIL SYLLABLE TAA; 0BA4 0BBE
+TAMIL SYLLABLE TI;  0BA4 0BBF
+TAMIL SYLLABLE TII; 0BA4 0BC0
+TAMIL SYLLABLE TU;  0BA4 0BC1
+TAMIL SYLLABLE TUU; 0BA4 0BC2
+TAMIL SYLLABLE TE;  0BA4 0BC6
+TAMIL SYLLABLE TEE; 0BA4 0BC7
+TAMIL SYLLABLE TAI; 0BA4 0BC8
+TAMIL SYLLABLE TO;  0BA4 0BCA
+TAMIL SYLLABLE TOO; 0BA4 0BCB
+TAMIL SYLLABLE TAU; 0BA4 0BCC
+
+TAMIL SYLLABLE NAA; 0BA8 0BBE
+TAMIL SYLLABLE NI;  0BA8 0BBF
+TAMIL SYLLABLE NII; 0BA8 0BC0
+TAMIL SYLLABLE NU;  0BA8 0BC1
+TAMIL SYLLABLE NUU; 0BA8 0BC2
+TAMIL SYLLABLE NE;  0BA8 0BC6
+TAMIL SYLLABLE NEE; 0BA8 0BC7
+TAMIL SYLLABLE NAI; 0BA8 0BC8
+TAMIL SYLLABLE NO;  0BA8 0BCA
+TAMIL SYLLABLE NOO; 0BA8 0BCB
+TAMIL SYLLABLE NAU; 0BA8 0BCC
+
+TAMIL SYLLABLE PAA; 0BAA 0BBE
+TAMIL SYLLABLE PI;  0BAA 0BBF
+TAMIL SYLLABLE PII; 0BAA 0BC0
+TAMIL SYLLABLE PU;  0BAA 0BC1
+TAMIL SYLLABLE PUU; 0BAA 0BC2
+TAMIL SYLLABLE PE;  0BAA 0BC6
+TAMIL SYLLABLE PEE; 0BAA 0BC7
+TAMIL SYLLABLE PAI; 0BAA 0BC8
+TAMIL SYLLABLE PO;  0BAA 0BCA
+TAMIL SYLLABLE POO; 0BAA 0BCB
+TAMIL SYLLABLE PAU; 0BAA 0BCC
+
+TAMIL SYLLABLE MAA; 0BAE 0BBE
+TAMIL SYLLABLE MI;  0BAE 0BBF
+TAMIL SYLLABLE MII; 0BAE 0BC0
+TAMIL SYLLABLE MU;  0BAE 0BC1
+TAMIL SYLLABLE MUU; 0BAE 0BC2
+TAMIL SYLLABLE ME;  0BAE 0BC6
+TAMIL SYLLABLE MEE; 0BAE 0BC7
+TAMIL SYLLABLE MAI; 0BAE 0BC8
+TAMIL SYLLABLE MO;  0BAE 0BCA
+TAMIL SYLLABLE MOO; 0BAE 0BCB
+TAMIL SYLLABLE MAU; 0BAE 0BCC
+
+TAMIL SYLLABLE YAA; 0BAF 0BBE
+TAMIL SYLLABLE YI;  0BAF 0BBF
+TAMIL SYLLABLE YII; 0BAF 0BC0
+TAMIL SYLLABLE YU;  0BAF 0BC1
+TAMIL SYLLABLE YUU; 0BAF 0BC2
+TAMIL SYLLABLE YE;  0BAF 0BC6
+TAMIL SYLLABLE YEE; 0BAF 0BC7
+TAMIL SYLLABLE YAI; 0BAF 0BC8
+TAMIL SYLLABLE YO;  0BAF 0BCA
+TAMIL SYLLABLE YOO; 0BAF 0BCB
+TAMIL SYLLABLE YAU; 0BAF 0BCC
+
+TAMIL SYLLABLE RAA; 0BB0 0BBE
+TAMIL SYLLABLE RI;  0BB0 0BBF
+TAMIL SYLLABLE RII; 0BB0 0BC0
+TAMIL SYLLABLE RU;  0BB0 0BC1
+TAMIL SYLLABLE RUU; 0BB0 0BC2
+TAMIL SYLLABLE RE;  0BB0 0BC6
+TAMIL SYLLABLE REE; 0BB0 0BC7
+TAMIL SYLLABLE RAI; 0BB0 0BC8
+TAMIL SYLLABLE RO;  0BB0 0BCA
+TAMIL SYLLABLE ROO; 0BB0 0BCB
+TAMIL SYLLABLE RAU; 0BB0 0BCC
+
+TAMIL SYLLABLE LAA; 0BB2 0BBE
+TAMIL SYLLABLE LI;  0BB2 0BBF
+TAMIL SYLLABLE LII; 0BB2 0BC0
+TAMIL SYLLABLE LU;  0BB2 0BC1
+TAMIL SYLLABLE LUU; 0BB2 0BC2
+TAMIL SYLLABLE LE;  0BB2 0BC6
+TAMIL SYLLABLE LEE; 0BB2 0BC7
+TAMIL SYLLABLE LAI; 0BB2 0BC8
+TAMIL SYLLABLE LO;  0BB2 0BCA
+TAMIL SYLLABLE LOO; 0BB2 0BCB
+TAMIL SYLLABLE LAU; 0BB2 0BCC
+
+TAMIL SYLLABLE VAA; 0BB5 0BBE
+TAMIL SYLLABLE VI;  0BB5 0BBF
+TAMIL SYLLABLE VII; 0BB5 0BC0
+TAMIL SYLLABLE VU;  0BB5 0BC1
+TAMIL SYLLABLE VUU; 0BB5 0BC2
+TAMIL SYLLABLE VE;  0BB5 0BC6
+TAMIL SYLLABLE VEE; 0BB5 0BC7
+TAMIL SYLLABLE VAI; 0BB5 0BC8
+TAMIL SYLLABLE VO;  0BB5 0BCA
+TAMIL SYLLABLE VOO; 0BB5 0BCB
+TAMIL SYLLABLE VAU; 0BB5 0BCC
+
+TAMIL SYLLABLE LLLAA; 0BB4 0BBE
+TAMIL SYLLABLE LLLI;  0BB4 0BBF
+TAMIL SYLLABLE LLLII; 0BB4 0BC0
+TAMIL SYLLABLE LLLU;  0BB4 0BC1
+TAMIL SYLLABLE LLLUU; 0BB4 0BC2
+TAMIL SYLLABLE LLLE;  0BB4 0BC6
+TAMIL SYLLABLE LLLEE; 0BB4 0BC7
+TAMIL SYLLABLE LLLAI; 0BB4 0BC8
+TAMIL SYLLABLE LLLO;  0BB4 0BCA
+TAMIL SYLLABLE LLLOO; 0BB4 0BCB
+TAMIL SYLLABLE LLLAU; 0BB4 0BCC
+
+TAMIL SYLLABLE LLAA; 0BB3 0BBE
+TAMIL SYLLABLE LLI;  0BB3 0BBF
+TAMIL SYLLABLE LLII; 0BB3 0BC0
+TAMIL SYLLABLE LLU;  0BB3 0BC1
+TAMIL SYLLABLE LLUU; 0BB3 0BC2
+TAMIL SYLLABLE LLE;  0BB3 0BC6
+TAMIL SYLLABLE LLEE; 0BB3 0BC7
+TAMIL SYLLABLE LLAI; 0BB3 0BC8
+TAMIL SYLLABLE LLO;  0BB3 0BCA
+TAMIL SYLLABLE LLOO; 0BB3 0BCB
+TAMIL SYLLABLE LLAU; 0BB3 0BCC
+
+TAMIL SYLLABLE RRAA; 0BB1 0BBE
+TAMIL SYLLABLE RRI;  0BB1 0BBF
+TAMIL SYLLABLE RRII; 0BB1 0BC0
+TAMIL SYLLABLE RRU;  0BB1 0BC1
+TAMIL SYLLABLE RRUU; 0BB1 0BC2
+TAMIL SYLLABLE RRE;  0BB1 0BC6
+TAMIL SYLLABLE RREE; 0BB1 0BC7
+TAMIL SYLLABLE RRAI; 0BB1 0BC8
+TAMIL SYLLABLE RRO;  0BB1 0BCA
+TAMIL SYLLABLE RROO; 0BB1 0BCB
+TAMIL SYLLABLE RRAU; 0BB1 0BCC
+
+TAMIL SYLLABLE NNNAA; 0BA9 0BBE
+TAMIL SYLLABLE NNNI;  0BA9 0BBF
+TAMIL SYLLABLE NNNII; 0BA9 0BC0
+TAMIL SYLLABLE NNNU;  0BA9 0BC1
+TAMIL SYLLABLE NNNUU; 0BA9 0BC2
+TAMIL SYLLABLE NNNE;  0BA9 0BC6
+TAMIL SYLLABLE NNNEE; 0BA9 0BC7
+TAMIL SYLLABLE NNNAI; 0BA9 0BC8
+TAMIL SYLLABLE NNNO;  0BA9 0BCA
+TAMIL SYLLABLE NNNOO; 0BA9 0BCB
+TAMIL SYLLABLE NNNAU; 0BA9 0BCC
+
+TAMIL SYLLABLE JAA; 0B9C 0BBE
+TAMIL SYLLABLE JI;  0B9C 0BBF
+TAMIL SYLLABLE JII; 0B9C 0BC0
+TAMIL SYLLABLE JU;  0B9C 0BC1
+TAMIL SYLLABLE JUU; 0B9C 0BC2
+TAMIL SYLLABLE JE;  0B9C 0BC6
+TAMIL SYLLABLE JEE; 0B9C 0BC7
+TAMIL SYLLABLE JAI; 0B9C 0BC8
+TAMIL SYLLABLE JO;  0B9C 0BCA
+TAMIL SYLLABLE JOO; 0B9C 0BCB
+TAMIL SYLLABLE JAU; 0B9C 0BCC
+
+TAMIL SYLLABLE SHAA; 0BB6 0BBE
+TAMIL SYLLABLE SHI;  0BB6 0BBF
+TAMIL SYLLABLE SHII; 0BB6 0BC0
+TAMIL SYLLABLE SHU;  0BB6 0BC1
+TAMIL SYLLABLE SHUU; 0BB6 0BC2
+TAMIL SYLLABLE SHE;  0BB6 0BC6
+TAMIL SYLLABLE SHEE; 0BB6 0BC7
+TAMIL SYLLABLE SHAI; 0BB6 0BC8
+TAMIL SYLLABLE SHO;  0BB6 0BCA
+TAMIL SYLLABLE SHOO; 0BB6 0BCB
+TAMIL SYLLABLE SHAU; 0BB6 0BCC
+
+TAMIL SYLLABLE SSAA; 0BB7 0BBE
+TAMIL SYLLABLE SSI;  0BB7 0BBF
+TAMIL SYLLABLE SSII; 0BB7 0BC0
+TAMIL SYLLABLE SSU;  0BB7 0BC1
+TAMIL SYLLABLE SSUU; 0BB7 0BC2
+TAMIL SYLLABLE SSE;  0BB7 0BC6
+TAMIL SYLLABLE SSEE; 0BB7 0BC7
+TAMIL SYLLABLE SSAI; 0BB7 0BC8
+TAMIL SYLLABLE SSO;  0BB7 0BCA
+TAMIL SYLLABLE SSOO; 0BB7 0BCB
+TAMIL SYLLABLE SSAU; 0BB7 0BCC
+
+TAMIL SYLLABLE SAA; 0BB8 0BBE
+TAMIL SYLLABLE SI;  0BB8 0BBF
+TAMIL SYLLABLE SII; 0BB8 0BC0
+TAMIL SYLLABLE SU;  0BB8 0BC1
+TAMIL SYLLABLE SUU; 0BB8 0BC2
+TAMIL SYLLABLE SE;  0BB8 0BC6
+TAMIL SYLLABLE SEE; 0BB8 0BC7
+TAMIL SYLLABLE SAI; 0BB8 0BC8
+TAMIL SYLLABLE SO;  0BB8 0BCA
+TAMIL SYLLABLE SOO; 0BB8 0BCB
+TAMIL SYLLABLE SAU; 0BB8 0BCC
+
+TAMIL SYLLABLE HAA; 0BB9 0BBE
+TAMIL SYLLABLE HI;  0BB9 0BBF
+TAMIL SYLLABLE HII; 0BB9 0BC0
+TAMIL SYLLABLE HU;  0BB9 0BC1
+TAMIL SYLLABLE HUU; 0BB9 0BC2
+TAMIL SYLLABLE HE;  0BB9 0BC6
+TAMIL SYLLABLE HEE; 0BB9 0BC7
+TAMIL SYLLABLE HAI; 0BB9 0BC8
+TAMIL SYLLABLE HO;  0BB9 0BCA
+TAMIL SYLLABLE HOO; 0BB9 0BCB
+TAMIL SYLLABLE HAU; 0BB9 0BCC
+
+TAMIL SYLLABLE KSSA;  0B95 0BCD 0BB7
+TAMIL SYLLABLE KSSAA; 0B95 0BCD 0BB7 0BBE
+TAMIL SYLLABLE KSSI;  0B95 0BCD 0BB7 0BBF
+TAMIL SYLLABLE KSSII; 0B95 0BCD 0BB7 0BC0
+TAMIL SYLLABLE KSSU;  0B95 0BCD 0BB7 0BC1
+TAMIL SYLLABLE KSSUU; 0B95 0BCD 0BB7 0BC2
+TAMIL SYLLABLE KSSE;  0B95 0BCD 0BB7 0BC6
+TAMIL SYLLABLE KSSEE; 0B95 0BCD 0BB7 0BC7
+TAMIL SYLLABLE KSSAI; 0B95 0BCD 0BB7 0BC8
+TAMIL SYLLABLE KSSO;  0B95 0BCD 0BB7 0BCA
+TAMIL SYLLABLE KSSOO; 0B95 0BCD 0BB7 0BCB
+TAMIL SYLLABLE KSSAU; 0B95 0BCD 0BB7 0BCC
+
+TAMIL SYLLABLE SHRII; 0BB6 0BCD 0BB0 0BC0
+
+GEORGIAN LETTER U-BRJGU;10E3 0302
+KHMER CONSONANT SIGN COENG KA;17D2 1780
+KHMER CONSONANT SIGN COENG KHA;17D2 1781
+KHMER CONSONANT SIGN COENG KO;17D2 1782
+KHMER CONSONANT SIGN COENG KHO;17D2 1783
+KHMER CONSONANT SIGN COENG NGO;17D2 1784
+KHMER CONSONANT SIGN COENG CA;17D2 1785
+KHMER CONSONANT SIGN COENG CHA;17D2 1786
+KHMER CONSONANT SIGN COENG CO;17D2 1787
+KHMER CONSONANT SIGN COENG CHO;17D2 1788
+KHMER CONSONANT SIGN COENG NYO;17D2 1789
+KHMER CONSONANT SIGN COENG DA;17D2 178A
+KHMER CONSONANT SIGN COENG TTHA;17D2 178B
+KHMER CONSONANT SIGN COENG DO;17D2 178C
+KHMER CONSONANT SIGN COENG TTHO;17D2 178D
+KHMER CONSONANT SIGN COENG NA;17D2 178E
+KHMER CONSONANT SIGN COENG TA;17D2 178F
+KHMER CONSONANT SIGN COENG THA;17D2 1790
+KHMER CONSONANT SIGN COENG TO;17D2 1791
+KHMER CONSONANT SIGN COENG THO;17D2 1792
+KHMER CONSONANT SIGN COENG NO;17D2 1793
+KHMER CONSONANT SIGN COENG BA;17D2 1794
+KHMER CONSONANT SIGN COENG PHA;17D2 1795
+KHMER CONSONANT SIGN COENG PO;17D2 1796
+KHMER CONSONANT SIGN COENG PHO;17D2 1797
+KHMER CONSONANT SIGN COENG MO;17D2 1798
+KHMER CONSONANT SIGN COENG YO;17D2 1799
+KHMER CONSONANT SIGN COENG RO;17D2 179A
+KHMER CONSONANT SIGN COENG LO;17D2 179B
+KHMER CONSONANT SIGN COENG VO;17D2 179C
+KHMER CONSONANT SIGN COENG SHA;17D2 179D
+KHMER CONSONANT SIGN COENG SSA;17D2 179E
+KHMER CONSONANT SIGN COENG SA;17D2 179F
+KHMER CONSONANT SIGN COENG HA;17D2 17A0
+KHMER CONSONANT SIGN COENG LA;17D2 17A1
+KHMER VOWEL SIGN COENG QA;17D2 17A2
+KHMER INDEPENDENT VOWEL SIGN COENG QU;17D2 17A7
+KHMER INDEPENDENT VOWEL SIGN COENG RY;17D2 17AB
+KHMER INDEPENDENT VOWEL SIGN COENG RYY;17D2 17AC
+KHMER INDEPENDENT VOWEL SIGN COENG QE;17D2 17AF
+KHMER VOWEL SIGN OM;17BB 17C6
+KHMER VOWEL SIGN AAM;17B6 17C6
+KATAKANA LETTER AINU P;31F7 309A
+MODIFIER LETTER EXTRA-HIGH EXTRA-LOW CONTOUR TONE BAR;02E5 02E9
diff --git a/rpython/rlib/unicodedata/NamedSequences-6.0.0.txt b/rpython/rlib/unicodedata/NamedSequences-6.0.0.txt
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/unicodedata/NamedSequences-6.0.0.txt
@@ -0,0 +1,495 @@
+# NamedSequences-6.0.0.txt
+# Date: 2010-05-18, 10:48:00 PDT [KW]
+#
+# Unicode Character Database
+# Copyright (c) 1991-2010 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Format:
+# Name of Sequence; Code Point Sequence for USI
+#
+# Code point sequences in the UCD use spaces as delimiters.
+# The corresponding format for a USI in ISO/IEC 10646 uses
+# comma delimitation and angle brackets. Thus, a named sequence
+# of the form:
+#
+# EXAMPLE NAME;1000 1001 1002
+#
+# in this data file, would correspond to a 10646 USI as follows:
+#
+# <1000, 1001, 1002> 
+#
+# Note: The order of entries in this file is not significant.
+# However, entries are generally in script order corresponding
+# to block order in the Unicode Standard, to make it easier
+# to find entries in the list.
+
+# ================================================
+
+LATIN CAPITAL LETTER A WITH MACRON AND GRAVE;0100 0300
+LATIN SMALL LETTER A WITH MACRON AND GRAVE;0101 0300
+LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW;0045 0329
+LATIN SMALL LETTER E WITH VERTICAL LINE BELOW;0065 0329
+LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND GRAVE;00C8 0329
+LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND GRAVE;00E8 0329
+LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND ACUTE;00C9 0329
+LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND ACUTE;00E9 0329
+LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON;00CA 0304
+LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON;00EA 0304
+LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON;00CA 030C
+LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON;00EA 030C
+LATIN CAPITAL LETTER I WITH MACRON AND GRAVE;012A 0300
+LATIN SMALL LETTER I WITH MACRON AND GRAVE;012B 0300
+LATIN SMALL LETTER I WITH DOT ABOVE AND ACUTE;0069 0307 0301
+LATIN SMALL LETTER NG WITH TILDE ABOVE;006E 0360 0067
+LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW;004F 0329
+LATIN SMALL LETTER O WITH VERTICAL LINE BELOW;006F 0329
+LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND GRAVE;00D2 0329
+LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND GRAVE;00F2 0329
+LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND ACUTE;00D3 0329
+LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND ACUTE;00F3 0329
+LATIN CAPITAL LETTER S WITH VERTICAL LINE BELOW;0053 0329
+LATIN SMALL LETTER S WITH VERTICAL LINE BELOW;0073 0329
+LATIN CAPITAL LETTER U WITH MACRON AND GRAVE;016A 0300
+LATIN SMALL LETTER U WITH MACRON AND GRAVE;016B 0300
+
+# Additions for Lithuanian. Provisional 2006-05-18, Approved 2007-10-19
+
+LATIN CAPITAL LETTER A WITH OGONEK AND ACUTE;0104 0301
+LATIN SMALL LETTER A WITH OGONEK AND ACUTE;0105 0301
+LATIN CAPITAL LETTER A WITH OGONEK AND TILDE;0104 0303
+LATIN SMALL LETTER A WITH OGONEK AND TILDE;0105 0303
+LATIN CAPITAL LETTER E WITH OGONEK AND ACUTE;0118 0301
+LATIN SMALL LETTER E WITH OGONEK AND ACUTE;0119 0301
+LATIN CAPITAL LETTER E WITH OGONEK AND TILDE;0118 0303
+LATIN SMALL LETTER E WITH OGONEK AND TILDE;0119 0303
+LATIN CAPITAL LETTER E WITH DOT ABOVE AND ACUTE;0116 0301
+LATIN SMALL LETTER E WITH DOT ABOVE AND ACUTE;0117 0301
+LATIN CAPITAL LETTER E WITH DOT ABOVE AND TILDE;0116 0303
+LATIN SMALL LETTER E WITH DOT ABOVE AND TILDE;0117 0303
+LATIN SMALL LETTER I WITH DOT ABOVE AND GRAVE;0069 0307 0300
+LATIN SMALL LETTER I WITH DOT ABOVE AND TILDE;0069 0307 0303
+LATIN CAPITAL LETTER I WITH OGONEK AND ACUTE;012E 0301
+LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND ACUTE;012F 0307 0301
+LATIN CAPITAL LETTER I WITH OGONEK AND TILDE;012E 0303
+LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND TILDE;012F 0307 0303
+LATIN CAPITAL LETTER J WITH TILDE;004A 0303
+LATIN SMALL LETTER J WITH DOT ABOVE AND TILDE;006A 0307 0303
+LATIN CAPITAL LETTER L WITH TILDE;004C 0303
+LATIN SMALL LETTER L WITH TILDE;006C 0303
+LATIN CAPITAL LETTER M WITH TILDE;004D 0303
+LATIN SMALL LETTER M WITH TILDE;006D 0303
+LATIN CAPITAL LETTER R WITH TILDE;0052 0303
+LATIN SMALL LETTER R WITH TILDE;0072 0303
+LATIN CAPITAL LETTER U WITH OGONEK AND ACUTE;0172 0301
+LATIN SMALL LETTER U WITH OGONEK AND ACUTE;0173 0301
+LATIN CAPITAL LETTER U WITH OGONEK AND TILDE;0172 0303
+LATIN SMALL LETTER U WITH OGONEK AND TILDE;0173 0303
+LATIN CAPITAL LETTER U WITH MACRON AND ACUTE;016A 0301
+LATIN SMALL LETTER U WITH MACRON AND ACUTE;016B 0301
+LATIN CAPITAL LETTER U WITH MACRON AND TILDE;016A 0303
+LATIN SMALL LETTER U WITH MACRON AND TILDE;016B 0303
+
+# Entries for JIS X 0213 compatibility mapping.
+# Provisional: 2008-11-07, Approved 2010-05-14
+
+LATIN SMALL LETTER AE WITH GRAVE;00E6 0300
+LATIN SMALL LETTER OPEN O WITH GRAVE;0254 0300
+LATIN SMALL LETTER OPEN O WITH ACUTE;0254 0301
+LATIN SMALL LETTER TURNED V WITH GRAVE;028C 0300
+LATIN SMALL LETTER TURNED V WITH ACUTE;028C 0301
+LATIN SMALL LETTER SCHWA WITH GRAVE;0259 0300
+LATIN SMALL LETTER SCHWA WITH ACUTE;0259 0301
+LATIN SMALL LETTER HOOKED SCHWA WITH GRAVE;025A 0300
+LATIN SMALL LETTER HOOKED SCHWA WITH ACUTE;025A 0301
+
+# Entry for a Bangla entity.
+# Provisional: 2009-08-10, Approved 2010-05-14
+
+BENGALI LETTER KHINYA;0995 09CD 09B7
+
+# Additions for Tamil. Provisional 2008-02-08, Approved 2009-08-14
+#
+# A visual display of the Tamil named sequences is available
+# in the documentation for Unicode 5.2. See:
+# http://www.unicode.org/versions/Unicode5.2.0/
+
+TAMIL CONSONANT K;  0B95 0BCD
+TAMIL CONSONANT NG; 0B99 0BCD
+TAMIL CONSONANT C;  0B9A 0BCD
+TAMIL CONSONANT NY; 0B9E 0BCD
+TAMIL CONSONANT TT; 0B9F 0BCD
+TAMIL CONSONANT NN; 0BA3 0BCD
+TAMIL CONSONANT T;  0BA4 0BCD
+TAMIL CONSONANT N;  0BA8 0BCD
+TAMIL CONSONANT P;  0BAA 0BCD
+TAMIL CONSONANT M;  0BAE 0BCD
+TAMIL CONSONANT Y;  0BAF 0BCD
+TAMIL CONSONANT R;  0BB0 0BCD
+TAMIL CONSONANT L;  0BB2 0BCD
+TAMIL CONSONANT V;  0BB5 0BCD
+TAMIL CONSONANT LLL;0BB4 0BCD
+TAMIL CONSONANT LL; 0BB3 0BCD
+TAMIL CONSONANT RR; 0BB1 0BCD
+TAMIL CONSONANT NNN;0BA9 0BCD
+TAMIL CONSONANT J;  0B9C 0BCD
+TAMIL CONSONANT SH; 0BB6 0BCD
+TAMIL CONSONANT SS; 0BB7 0BCD
+TAMIL CONSONANT S;  0BB8 0BCD
+TAMIL CONSONANT H;  0BB9 0BCD
+TAMIL CONSONANT KSS;0B95 0BCD 0BB7 0BCD
+
+TAMIL SYLLABLE KAA; 0B95 0BBE
+TAMIL SYLLABLE KI;  0B95 0BBF
+TAMIL SYLLABLE KII; 0B95 0BC0
+TAMIL SYLLABLE KU;  0B95 0BC1
+TAMIL SYLLABLE KUU; 0B95 0BC2
+TAMIL SYLLABLE KE;  0B95 0BC6
+TAMIL SYLLABLE KEE; 0B95 0BC7
+TAMIL SYLLABLE KAI; 0B95 0BC8
+TAMIL SYLLABLE KO;  0B95 0BCA
+TAMIL SYLLABLE KOO; 0B95 0BCB
+TAMIL SYLLABLE KAU; 0B95 0BCC
+
+TAMIL SYLLABLE NGAA; 0B99 0BBE
+TAMIL SYLLABLE NGI;  0B99 0BBF
+TAMIL SYLLABLE NGII; 0B99 0BC0
+TAMIL SYLLABLE NGU;  0B99 0BC1
+TAMIL SYLLABLE NGUU; 0B99 0BC2
+TAMIL SYLLABLE NGE;  0B99 0BC6
+TAMIL SYLLABLE NGEE; 0B99 0BC7
+TAMIL SYLLABLE NGAI; 0B99 0BC8
+TAMIL SYLLABLE NGO;  0B99 0BCA
+TAMIL SYLLABLE NGOO; 0B99 0BCB
+TAMIL SYLLABLE NGAU; 0B99 0BCC
+
+TAMIL SYLLABLE CAA; 0B9A 0BBE
+TAMIL SYLLABLE CI;  0B9A 0BBF
+TAMIL SYLLABLE CII; 0B9A 0BC0
+TAMIL SYLLABLE CU;  0B9A 0BC1
+TAMIL SYLLABLE CUU; 0B9A 0BC2
+TAMIL SYLLABLE CE;  0B9A 0BC6
+TAMIL SYLLABLE CEE; 0B9A 0BC7
+TAMIL SYLLABLE CAI; 0B9A 0BC8
+TAMIL SYLLABLE CO;  0B9A 0BCA
+TAMIL SYLLABLE COO; 0B9A 0BCB
+TAMIL SYLLABLE CAU; 0B9A 0BCC
+
+TAMIL SYLLABLE NYAA; 0B9E 0BBE
+TAMIL SYLLABLE NYI;  0B9E 0BBF
+TAMIL SYLLABLE NYII; 0B9E 0BC0
+TAMIL SYLLABLE NYU;  0B9E 0BC1
+TAMIL SYLLABLE NYUU; 0B9E 0BC2
+TAMIL SYLLABLE NYE;  0B9E 0BC6
+TAMIL SYLLABLE NYEE; 0B9E 0BC7
+TAMIL SYLLABLE NYAI; 0B9E 0BC8
+TAMIL SYLLABLE NYO;  0B9E 0BCA
+TAMIL SYLLABLE NYOO; 0B9E 0BCB
+TAMIL SYLLABLE NYAU; 0B9E 0BCC
+
+TAMIL SYLLABLE TTAA; 0B9F 0BBE
+TAMIL SYLLABLE TTI;  0B9F 0BBF
+TAMIL SYLLABLE TTII; 0B9F 0BC0
+TAMIL SYLLABLE TTU;  0B9F 0BC1
+TAMIL SYLLABLE TTUU; 0B9F 0BC2
+TAMIL SYLLABLE TTE;  0B9F 0BC6
+TAMIL SYLLABLE TTEE; 0B9F 0BC7
+TAMIL SYLLABLE TTAI; 0B9F 0BC8
+TAMIL SYLLABLE TTO;  0B9F 0BCA
+TAMIL SYLLABLE TTOO; 0B9F 0BCB
+TAMIL SYLLABLE TTAU; 0B9F 0BCC
+
+TAMIL SYLLABLE NNAA; 0BA3 0BBE
+TAMIL SYLLABLE NNI;  0BA3 0BBF
+TAMIL SYLLABLE NNII; 0BA3 0BC0
+TAMIL SYLLABLE NNU;  0BA3 0BC1
+TAMIL SYLLABLE NNUU; 0BA3 0BC2
+TAMIL SYLLABLE NNE;  0BA3 0BC6
+TAMIL SYLLABLE NNEE; 0BA3 0BC7
+TAMIL SYLLABLE NNAI; 0BA3 0BC8
+TAMIL SYLLABLE NNO;  0BA3 0BCA
+TAMIL SYLLABLE NNOO; 0BA3 0BCB
+TAMIL SYLLABLE NNAU; 0BA3 0BCC
+
+TAMIL SYLLABLE TAA; 0BA4 0BBE
+TAMIL SYLLABLE TI;  0BA4 0BBF
+TAMIL SYLLABLE TII; 0BA4 0BC0
+TAMIL SYLLABLE TU;  0BA4 0BC1
+TAMIL SYLLABLE TUU; 0BA4 0BC2
+TAMIL SYLLABLE TE;  0BA4 0BC6
+TAMIL SYLLABLE TEE; 0BA4 0BC7
+TAMIL SYLLABLE TAI; 0BA4 0BC8
+TAMIL SYLLABLE TO;  0BA4 0BCA
+TAMIL SYLLABLE TOO; 0BA4 0BCB
+TAMIL SYLLABLE TAU; 0BA4 0BCC
+
+TAMIL SYLLABLE NAA; 0BA8 0BBE
+TAMIL SYLLABLE NI;  0BA8 0BBF
+TAMIL SYLLABLE NII; 0BA8 0BC0
+TAMIL SYLLABLE NU;  0BA8 0BC1
+TAMIL SYLLABLE NUU; 0BA8 0BC2
+TAMIL SYLLABLE NE;  0BA8 0BC6
+TAMIL SYLLABLE NEE; 0BA8 0BC7
+TAMIL SYLLABLE NAI; 0BA8 0BC8
+TAMIL SYLLABLE NO;  0BA8 0BCA
+TAMIL SYLLABLE NOO; 0BA8 0BCB
+TAMIL SYLLABLE NAU; 0BA8 0BCC
+
+TAMIL SYLLABLE PAA; 0BAA 0BBE
+TAMIL SYLLABLE PI;  0BAA 0BBF
+TAMIL SYLLABLE PII; 0BAA 0BC0
+TAMIL SYLLABLE PU;  0BAA 0BC1
+TAMIL SYLLABLE PUU; 0BAA 0BC2
+TAMIL SYLLABLE PE;  0BAA 0BC6
+TAMIL SYLLABLE PEE; 0BAA 0BC7
+TAMIL SYLLABLE PAI; 0BAA 0BC8
+TAMIL SYLLABLE PO;  0BAA 0BCA
+TAMIL SYLLABLE POO; 0BAA 0BCB
+TAMIL SYLLABLE PAU; 0BAA 0BCC
+
+TAMIL SYLLABLE MAA; 0BAE 0BBE
+TAMIL SYLLABLE MI;  0BAE 0BBF
+TAMIL SYLLABLE MII; 0BAE 0BC0
+TAMIL SYLLABLE MU;  0BAE 0BC1
+TAMIL SYLLABLE MUU; 0BAE 0BC2
+TAMIL SYLLABLE ME;  0BAE 0BC6
+TAMIL SYLLABLE MEE; 0BAE 0BC7
+TAMIL SYLLABLE MAI; 0BAE 0BC8
+TAMIL SYLLABLE MO;  0BAE 0BCA
+TAMIL SYLLABLE MOO; 0BAE 0BCB
+TAMIL SYLLABLE MAU; 0BAE 0BCC
+
+TAMIL SYLLABLE YAA; 0BAF 0BBE
+TAMIL SYLLABLE YI;  0BAF 0BBF
+TAMIL SYLLABLE YII; 0BAF 0BC0
+TAMIL SYLLABLE YU;  0BAF 0BC1
+TAMIL SYLLABLE YUU; 0BAF 0BC2
+TAMIL SYLLABLE YE;  0BAF 0BC6
+TAMIL SYLLABLE YEE; 0BAF 0BC7
+TAMIL SYLLABLE YAI; 0BAF 0BC8
+TAMIL SYLLABLE YO;  0BAF 0BCA
+TAMIL SYLLABLE YOO; 0BAF 0BCB
+TAMIL SYLLABLE YAU; 0BAF 0BCC
+
+TAMIL SYLLABLE RAA; 0BB0 0BBE
+TAMIL SYLLABLE RI;  0BB0 0BBF
+TAMIL SYLLABLE RII; 0BB0 0BC0
+TAMIL SYLLABLE RU;  0BB0 0BC1
+TAMIL SYLLABLE RUU; 0BB0 0BC2
+TAMIL SYLLABLE RE;  0BB0 0BC6
+TAMIL SYLLABLE REE; 0BB0 0BC7
+TAMIL SYLLABLE RAI; 0BB0 0BC8
+TAMIL SYLLABLE RO;  0BB0 0BCA
+TAMIL SYLLABLE ROO; 0BB0 0BCB
+TAMIL SYLLABLE RAU; 0BB0 0BCC
+
+TAMIL SYLLABLE LAA; 0BB2 0BBE
+TAMIL SYLLABLE LI;  0BB2 0BBF
+TAMIL SYLLABLE LII; 0BB2 0BC0
+TAMIL SYLLABLE LU;  0BB2 0BC1
+TAMIL SYLLABLE LUU; 0BB2 0BC2
+TAMIL SYLLABLE LE;  0BB2 0BC6
+TAMIL SYLLABLE LEE; 0BB2 0BC7
+TAMIL SYLLABLE LAI; 0BB2 0BC8
+TAMIL SYLLABLE LO;  0BB2 0BCA
+TAMIL SYLLABLE LOO; 0BB2 0BCB
+TAMIL SYLLABLE LAU; 0BB2 0BCC
+
+TAMIL SYLLABLE VAA; 0BB5 0BBE
+TAMIL SYLLABLE VI;  0BB5 0BBF
+TAMIL SYLLABLE VII; 0BB5 0BC0
+TAMIL SYLLABLE VU;  0BB5 0BC1
+TAMIL SYLLABLE VUU; 0BB5 0BC2
+TAMIL SYLLABLE VE;  0BB5 0BC6
+TAMIL SYLLABLE VEE; 0BB5 0BC7
+TAMIL SYLLABLE VAI; 0BB5 0BC8
+TAMIL SYLLABLE VO;  0BB5 0BCA
+TAMIL SYLLABLE VOO; 0BB5 0BCB
+TAMIL SYLLABLE VAU; 0BB5 0BCC
+
+TAMIL SYLLABLE LLLAA; 0BB4 0BBE
+TAMIL SYLLABLE LLLI;  0BB4 0BBF
+TAMIL SYLLABLE LLLII; 0BB4 0BC0
+TAMIL SYLLABLE LLLU;  0BB4 0BC1
+TAMIL SYLLABLE LLLUU; 0BB4 0BC2
+TAMIL SYLLABLE LLLE;  0BB4 0BC6
+TAMIL SYLLABLE LLLEE; 0BB4 0BC7
+TAMIL SYLLABLE LLLAI; 0BB4 0BC8
+TAMIL SYLLABLE LLLO;  0BB4 0BCA
+TAMIL SYLLABLE LLLOO; 0BB4 0BCB
+TAMIL SYLLABLE LLLAU; 0BB4 0BCC
+
+TAMIL SYLLABLE LLAA; 0BB3 0BBE
+TAMIL SYLLABLE LLI;  0BB3 0BBF
+TAMIL SYLLABLE LLII; 0BB3 0BC0
+TAMIL SYLLABLE LLU;  0BB3 0BC1
+TAMIL SYLLABLE LLUU; 0BB3 0BC2
+TAMIL SYLLABLE LLE;  0BB3 0BC6
+TAMIL SYLLABLE LLEE; 0BB3 0BC7
+TAMIL SYLLABLE LLAI; 0BB3 0BC8
+TAMIL SYLLABLE LLO;  0BB3 0BCA
+TAMIL SYLLABLE LLOO; 0BB3 0BCB
+TAMIL SYLLABLE LLAU; 0BB3 0BCC
+
+TAMIL SYLLABLE RRAA; 0BB1 0BBE
+TAMIL SYLLABLE RRI;  0BB1 0BBF
+TAMIL SYLLABLE RRII; 0BB1 0BC0
+TAMIL SYLLABLE RRU;  0BB1 0BC1
+TAMIL SYLLABLE RRUU; 0BB1 0BC2
+TAMIL SYLLABLE RRE;  0BB1 0BC6
+TAMIL SYLLABLE RREE; 0BB1 0BC7
+TAMIL SYLLABLE RRAI; 0BB1 0BC8
+TAMIL SYLLABLE RRO;  0BB1 0BCA
+TAMIL SYLLABLE RROO; 0BB1 0BCB
+TAMIL SYLLABLE RRAU; 0BB1 0BCC
+
+TAMIL SYLLABLE NNNAA; 0BA9 0BBE
+TAMIL SYLLABLE NNNI;  0BA9 0BBF
+TAMIL SYLLABLE NNNII; 0BA9 0BC0
+TAMIL SYLLABLE NNNU;  0BA9 0BC1
+TAMIL SYLLABLE NNNUU; 0BA9 0BC2
+TAMIL SYLLABLE NNNE;  0BA9 0BC6
+TAMIL SYLLABLE NNNEE; 0BA9 0BC7
+TAMIL SYLLABLE NNNAI; 0BA9 0BC8
+TAMIL SYLLABLE NNNO;  0BA9 0BCA
+TAMIL SYLLABLE NNNOO; 0BA9 0BCB
+TAMIL SYLLABLE NNNAU; 0BA9 0BCC
+
+TAMIL SYLLABLE JAA; 0B9C 0BBE
+TAMIL SYLLABLE JI;  0B9C 0BBF
+TAMIL SYLLABLE JII; 0B9C 0BC0
+TAMIL SYLLABLE JU;  0B9C 0BC1
+TAMIL SYLLABLE JUU; 0B9C 0BC2
+TAMIL SYLLABLE JE;  0B9C 0BC6
+TAMIL SYLLABLE JEE; 0B9C 0BC7
+TAMIL SYLLABLE JAI; 0B9C 0BC8
+TAMIL SYLLABLE JO;  0B9C 0BCA
+TAMIL SYLLABLE JOO; 0B9C 0BCB
+TAMIL SYLLABLE JAU; 0B9C 0BCC
+
+TAMIL SYLLABLE SHAA; 0BB6 0BBE
+TAMIL SYLLABLE SHI;  0BB6 0BBF
+TAMIL SYLLABLE SHII; 0BB6 0BC0
+TAMIL SYLLABLE SHU;  0BB6 0BC1
+TAMIL SYLLABLE SHUU; 0BB6 0BC2
+TAMIL SYLLABLE SHE;  0BB6 0BC6
+TAMIL SYLLABLE SHEE; 0BB6 0BC7
+TAMIL SYLLABLE SHAI; 0BB6 0BC8
+TAMIL SYLLABLE SHO;  0BB6 0BCA
+TAMIL SYLLABLE SHOO; 0BB6 0BCB
+TAMIL SYLLABLE SHAU; 0BB6 0BCC
+
+TAMIL SYLLABLE SSAA; 0BB7 0BBE
+TAMIL SYLLABLE SSI;  0BB7 0BBF
+TAMIL SYLLABLE SSII; 0BB7 0BC0
+TAMIL SYLLABLE SSU;  0BB7 0BC1
+TAMIL SYLLABLE SSUU; 0BB7 0BC2
+TAMIL SYLLABLE SSE;  0BB7 0BC6
+TAMIL SYLLABLE SSEE; 0BB7 0BC7
+TAMIL SYLLABLE SSAI; 0BB7 0BC8
+TAMIL SYLLABLE SSO;  0BB7 0BCA
+TAMIL SYLLABLE SSOO; 0BB7 0BCB
+TAMIL SYLLABLE SSAU; 0BB7 0BCC
+
+TAMIL SYLLABLE SAA; 0BB8 0BBE
+TAMIL SYLLABLE SI;  0BB8 0BBF
+TAMIL SYLLABLE SII; 0BB8 0BC0
+TAMIL SYLLABLE SU;  0BB8 0BC1
+TAMIL SYLLABLE SUU; 0BB8 0BC2
+TAMIL SYLLABLE SE;  0BB8 0BC6
+TAMIL SYLLABLE SEE; 0BB8 0BC7
+TAMIL SYLLABLE SAI; 0BB8 0BC8
+TAMIL SYLLABLE SO;  0BB8 0BCA
+TAMIL SYLLABLE SOO; 0BB8 0BCB
+TAMIL SYLLABLE SAU; 0BB8 0BCC
+
+TAMIL SYLLABLE HAA; 0BB9 0BBE
+TAMIL SYLLABLE HI;  0BB9 0BBF
+TAMIL SYLLABLE HII; 0BB9 0BC0
+TAMIL SYLLABLE HU;  0BB9 0BC1
+TAMIL SYLLABLE HUU; 0BB9 0BC2
+TAMIL SYLLABLE HE;  0BB9 0BC6
+TAMIL SYLLABLE HEE; 0BB9 0BC7
+TAMIL SYLLABLE HAI; 0BB9 0BC8
+TAMIL SYLLABLE HO;  0BB9 0BCA
+TAMIL SYLLABLE HOO; 0BB9 0BCB
+TAMIL SYLLABLE HAU; 0BB9 0BCC
+
+TAMIL SYLLABLE KSSA;  0B95 0BCD 0BB7
+TAMIL SYLLABLE KSSAA; 0B95 0BCD 0BB7 0BBE
+TAMIL SYLLABLE KSSI;  0B95 0BCD 0BB7 0BBF
+TAMIL SYLLABLE KSSII; 0B95 0BCD 0BB7 0BC0
+TAMIL SYLLABLE KSSU;  0B95 0BCD 0BB7 0BC1
+TAMIL SYLLABLE KSSUU; 0B95 0BCD 0BB7 0BC2
+TAMIL SYLLABLE KSSE;  0B95 0BCD 0BB7 0BC6
+TAMIL SYLLABLE KSSEE; 0B95 0BCD 0BB7 0BC7
+TAMIL SYLLABLE KSSAI; 0B95 0BCD 0BB7 0BC8
+TAMIL SYLLABLE KSSO;  0B95 0BCD 0BB7 0BCA
+TAMIL SYLLABLE KSSOO; 0B95 0BCD 0BB7 0BCB
+TAMIL SYLLABLE KSSAU; 0B95 0BCD 0BB7 0BCC
+
+TAMIL SYLLABLE SHRII; 0BB6 0BCD 0BB0 0BC0
+
+GEORGIAN LETTER U-BRJGU;10E3 0302
+KHMER CONSONANT SIGN COENG KA;17D2 1780
+KHMER CONSONANT SIGN COENG KHA;17D2 1781
+KHMER CONSONANT SIGN COENG KO;17D2 1782
+KHMER CONSONANT SIGN COENG KHO;17D2 1783
+KHMER CONSONANT SIGN COENG NGO;17D2 1784
+KHMER CONSONANT SIGN COENG CA;17D2 1785
+KHMER CONSONANT SIGN COENG CHA;17D2 1786
+KHMER CONSONANT SIGN COENG CO;17D2 1787
+KHMER CONSONANT SIGN COENG CHO;17D2 1788
+KHMER CONSONANT SIGN COENG NYO;17D2 1789
+KHMER CONSONANT SIGN COENG DA;17D2 178A
+KHMER CONSONANT SIGN COENG TTHA;17D2 178B
+KHMER CONSONANT SIGN COENG DO;17D2 178C
+KHMER CONSONANT SIGN COENG TTHO;17D2 178D
+KHMER CONSONANT SIGN COENG NA;17D2 178E
+KHMER CONSONANT SIGN COENG TA;17D2 178F
+KHMER CONSONANT SIGN COENG THA;17D2 1790
+KHMER CONSONANT SIGN COENG TO;17D2 1791
+KHMER CONSONANT SIGN COENG THO;17D2 1792
+KHMER CONSONANT SIGN COENG NO;17D2 1793
+KHMER CONSONANT SIGN COENG BA;17D2 1794
+KHMER CONSONANT SIGN COENG PHA;17D2 1795
+KHMER CONSONANT SIGN COENG PO;17D2 1796
+KHMER CONSONANT SIGN COENG PHO;17D2 1797
+KHMER CONSONANT SIGN COENG MO;17D2 1798
+KHMER CONSONANT SIGN COENG YO;17D2 1799
+KHMER CONSONANT SIGN COENG RO;17D2 179A
+KHMER CONSONANT SIGN COENG LO;17D2 179B
+KHMER CONSONANT SIGN COENG VO;17D2 179C
+KHMER CONSONANT SIGN COENG SHA;17D2 179D
+KHMER CONSONANT SIGN COENG SSA;17D2 179E
+KHMER CONSONANT SIGN COENG SA;17D2 179F
+KHMER CONSONANT SIGN COENG HA;17D2 17A0
+KHMER CONSONANT SIGN COENG LA;17D2 17A1
+KHMER VOWEL SIGN COENG QA;17D2 17A2
+KHMER INDEPENDENT VOWEL SIGN COENG QU;17D2 17A7
+KHMER INDEPENDENT VOWEL SIGN COENG RY;17D2 17AB
+KHMER INDEPENDENT VOWEL SIGN COENG RYY;17D2 17AC
+KHMER INDEPENDENT VOWEL SIGN COENG QE;17D2 17AF
+KHMER VOWEL SIGN OM;17BB 17C6
+KHMER VOWEL SIGN AAM;17B6 17C6
+
+# Entries for JIS X 0213 compatibility mapping.
+# Provisional: 2008-11-07, Approved 2010-05-14
+
+HIRAGANA LETTER BIDAKUON NGA;304B 309A
+HIRAGANA LETTER BIDAKUON NGI;304D 309A
+HIRAGANA LETTER BIDAKUON NGU;304F 309A
+HIRAGANA LETTER BIDAKUON NGE;3051 309A
+HIRAGANA LETTER BIDAKUON NGO;3053 309A
+KATAKANA LETTER BIDAKUON NGA;30AB 309A
+KATAKANA LETTER BIDAKUON NGI;30AD 309A
+KATAKANA LETTER BIDAKUON NGU;30AF 309A
+KATAKANA LETTER BIDAKUON NGE;30B1 309A
+KATAKANA LETTER BIDAKUON NGO;30B3 309A
+KATAKANA LETTER AINU CE;30BB 309A
+KATAKANA LETTER AINU TU;30C4 309A
+KATAKANA LETTER AINU TO;30C8 309A
+KATAKANA LETTER AINU P;31F7 309A
+MODIFIER LETTER EXTRA-HIGH EXTRA-LOW CONTOUR TONE BAR;02E5 02E9
+MODIFIER LETTER EXTRA-LOW EXTRA-HIGH CONTOUR TONE BAR;02E9 02E5
diff --git a/rpython/rlib/unicodedata/NamedSequences-6.2.0.txt b/rpython/rlib/unicodedata/NamedSequences-6.2.0.txt
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/unicodedata/NamedSequences-6.2.0.txt
@@ -0,0 +1,504 @@
+# NamedSequences-6.2.0.txt
+# Date: 2012-05-15, 21:23:00 GMT [KW]
+#
+# Unicode Character Database
+# Copyright (c) 1991-2012 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Format:
+# Name of Sequence; Code Point Sequence for USI
+#
+# Code point sequences in the UCD use spaces as delimiters.
+# The corresponding format for a USI in ISO/IEC 10646 uses
+# comma delimitation and angle brackets. Thus, a named sequence
+# of the form:
+#
+# EXAMPLE NAME;1000 1001 1002
+#
+# in this data file, would correspond to a 10646 USI as follows:
+#
+# <1000, 1001, 1002> 
+#
+# Note: The order of entries in this file is not significant.
+# However, entries are generally in script order corresponding
+# to block order in the Unicode Standard, to make it easier
+# to find entries in the list.
+
+# ================================================
+
+LATIN CAPITAL LETTER A WITH MACRON AND GRAVE;0100 0300
+LATIN SMALL LETTER A WITH MACRON AND GRAVE;0101 0300
+LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW;0045 0329
+LATIN SMALL LETTER E WITH VERTICAL LINE BELOW;0065 0329
+LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND GRAVE;00C8 0329
+LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND GRAVE;00E8 0329
+LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW AND ACUTE;00C9 0329
+LATIN SMALL LETTER E WITH VERTICAL LINE BELOW AND ACUTE;00E9 0329
+LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON;00CA 0304
+LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON;00EA 0304
+LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON;00CA 030C
+LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON;00EA 030C
+LATIN CAPITAL LETTER I WITH MACRON AND GRAVE;012A 0300
+LATIN SMALL LETTER I WITH MACRON AND GRAVE;012B 0300
+LATIN SMALL LETTER I WITH DOT ABOVE AND ACUTE;0069 0307 0301
+LATIN SMALL LETTER NG WITH TILDE ABOVE;006E 0360 0067
+LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW;004F 0329
+LATIN SMALL LETTER O WITH VERTICAL LINE BELOW;006F 0329
+LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND GRAVE;00D2 0329
+LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND GRAVE;00F2 0329
+LATIN CAPITAL LETTER O WITH VERTICAL LINE BELOW AND ACUTE;00D3 0329
+LATIN SMALL LETTER O WITH VERTICAL LINE BELOW AND ACUTE;00F3 0329
+LATIN CAPITAL LETTER S WITH VERTICAL LINE BELOW;0053 0329
+LATIN SMALL LETTER S WITH VERTICAL LINE BELOW;0073 0329
+LATIN CAPITAL LETTER U WITH MACRON AND GRAVE;016A 0300
+LATIN SMALL LETTER U WITH MACRON AND GRAVE;016B 0300
+
+# Additions for Lithuanian. Provisional 2006-05-18, Approved 2007-10-19
+
+LATIN CAPITAL LETTER A WITH OGONEK AND ACUTE;0104 0301
+LATIN SMALL LETTER A WITH OGONEK AND ACUTE;0105 0301
+LATIN CAPITAL LETTER A WITH OGONEK AND TILDE;0104 0303
+LATIN SMALL LETTER A WITH OGONEK AND TILDE;0105 0303
+LATIN CAPITAL LETTER E WITH OGONEK AND ACUTE;0118 0301
+LATIN SMALL LETTER E WITH OGONEK AND ACUTE;0119 0301
+LATIN CAPITAL LETTER E WITH OGONEK AND TILDE;0118 0303
+LATIN SMALL LETTER E WITH OGONEK AND TILDE;0119 0303
+LATIN CAPITAL LETTER E WITH DOT ABOVE AND ACUTE;0116 0301
+LATIN SMALL LETTER E WITH DOT ABOVE AND ACUTE;0117 0301
+LATIN CAPITAL LETTER E WITH DOT ABOVE AND TILDE;0116 0303
+LATIN SMALL LETTER E WITH DOT ABOVE AND TILDE;0117 0303
+LATIN SMALL LETTER I WITH DOT ABOVE AND GRAVE;0069 0307 0300
+LATIN SMALL LETTER I WITH DOT ABOVE AND TILDE;0069 0307 0303
+LATIN CAPITAL LETTER I WITH OGONEK AND ACUTE;012E 0301
+LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND ACUTE;012F 0307 0301
+LATIN CAPITAL LETTER I WITH OGONEK AND TILDE;012E 0303
+LATIN SMALL LETTER I WITH OGONEK AND DOT ABOVE AND TILDE;012F 0307 0303
+LATIN CAPITAL LETTER J WITH TILDE;004A 0303
+LATIN SMALL LETTER J WITH DOT ABOVE AND TILDE;006A 0307 0303
+LATIN CAPITAL LETTER L WITH TILDE;004C 0303
+LATIN SMALL LETTER L WITH TILDE;006C 0303
+LATIN CAPITAL LETTER M WITH TILDE;004D 0303
+LATIN SMALL LETTER M WITH TILDE;006D 0303
+LATIN CAPITAL LETTER R WITH TILDE;0052 0303
+LATIN SMALL LETTER R WITH TILDE;0072 0303
+LATIN CAPITAL LETTER U WITH OGONEK AND ACUTE;0172 0301
+LATIN SMALL LETTER U WITH OGONEK AND ACUTE;0173 0301
+LATIN CAPITAL LETTER U WITH OGONEK AND TILDE;0172 0303
+LATIN SMALL LETTER U WITH OGONEK AND TILDE;0173 0303
+LATIN CAPITAL LETTER U WITH MACRON AND ACUTE;016A 0301
+LATIN SMALL LETTER U WITH MACRON AND ACUTE;016B 0301
+LATIN CAPITAL LETTER U WITH MACRON AND TILDE;016A 0303
+LATIN SMALL LETTER U WITH MACRON AND TILDE;016B 0303
+
+# Entries for JIS X 0213 compatibility mapping.
+# Provisional: 2008-11-07, Approved 2010-05-14
+
+LATIN SMALL LETTER AE WITH GRAVE;00E6 0300
+LATIN SMALL LETTER OPEN O WITH GRAVE;0254 0300
+LATIN SMALL LETTER OPEN O WITH ACUTE;0254 0301
+LATIN SMALL LETTER TURNED V WITH GRAVE;028C 0300
+LATIN SMALL LETTER TURNED V WITH ACUTE;028C 0301
+LATIN SMALL LETTER SCHWA WITH GRAVE;0259 0300
+LATIN SMALL LETTER SCHWA WITH ACUTE;0259 0301
+LATIN SMALL LETTER HOOKED SCHWA WITH GRAVE;025A 0300
+LATIN SMALL LETTER HOOKED SCHWA WITH ACUTE;025A 0301
+
+# Entry for a Bangla entity.
+# Provisional: 2009-08-10, Approved 2010-05-14
+
+BENGALI LETTER KHINYA;0995 09CD 09B7
+
+# Additions for Tamil. Provisional 2008-02-08, Approved 2009-08-14
+#
+# A visual display of the Tamil named sequences is available
+# in the documentation for the Unicode Standard. See Section 9.6, Tamil in
+# http://www.unicode.org/versions/latest/
+
+TAMIL CONSONANT K;  0B95 0BCD
+TAMIL CONSONANT NG; 0B99 0BCD
+TAMIL CONSONANT C;  0B9A 0BCD
+TAMIL CONSONANT NY; 0B9E 0BCD
+TAMIL CONSONANT TT; 0B9F 0BCD
+TAMIL CONSONANT NN; 0BA3 0BCD
+TAMIL CONSONANT T;  0BA4 0BCD
+TAMIL CONSONANT N;  0BA8 0BCD
+TAMIL CONSONANT P;  0BAA 0BCD
+TAMIL CONSONANT M;  0BAE 0BCD
+TAMIL CONSONANT Y;  0BAF 0BCD
+TAMIL CONSONANT R;  0BB0 0BCD
+TAMIL CONSONANT L;  0BB2 0BCD
+TAMIL CONSONANT V;  0BB5 0BCD
+TAMIL CONSONANT LLL;0BB4 0BCD
+TAMIL CONSONANT LL; 0BB3 0BCD
+TAMIL CONSONANT RR; 0BB1 0BCD
+TAMIL CONSONANT NNN;0BA9 0BCD
+TAMIL CONSONANT J;  0B9C 0BCD
+TAMIL CONSONANT SH; 0BB6 0BCD
+TAMIL CONSONANT SS; 0BB7 0BCD
+TAMIL CONSONANT S;  0BB8 0BCD
+TAMIL CONSONANT H;  0BB9 0BCD
+TAMIL CONSONANT KSS;0B95 0BCD 0BB7 0BCD
+
+TAMIL SYLLABLE KAA; 0B95 0BBE
+TAMIL SYLLABLE KI;  0B95 0BBF
+TAMIL SYLLABLE KII; 0B95 0BC0
+TAMIL SYLLABLE KU;  0B95 0BC1
+TAMIL SYLLABLE KUU; 0B95 0BC2
+TAMIL SYLLABLE KE;  0B95 0BC6
+TAMIL SYLLABLE KEE; 0B95 0BC7
+TAMIL SYLLABLE KAI; 0B95 0BC8
+TAMIL SYLLABLE KO;  0B95 0BCA
+TAMIL SYLLABLE KOO; 0B95 0BCB
+TAMIL SYLLABLE KAU; 0B95 0BCC
+
+TAMIL SYLLABLE NGAA; 0B99 0BBE
+TAMIL SYLLABLE NGI;  0B99 0BBF
+TAMIL SYLLABLE NGII; 0B99 0BC0
+TAMIL SYLLABLE NGU;  0B99 0BC1
+TAMIL SYLLABLE NGUU; 0B99 0BC2
+TAMIL SYLLABLE NGE;  0B99 0BC6
+TAMIL SYLLABLE NGEE; 0B99 0BC7
+TAMIL SYLLABLE NGAI; 0B99 0BC8
+TAMIL SYLLABLE NGO;  0B99 0BCA
+TAMIL SYLLABLE NGOO; 0B99 0BCB
+TAMIL SYLLABLE NGAU; 0B99 0BCC
+
+TAMIL SYLLABLE CAA; 0B9A 0BBE
+TAMIL SYLLABLE CI;  0B9A 0BBF
+TAMIL SYLLABLE CII; 0B9A 0BC0
+TAMIL SYLLABLE CU;  0B9A 0BC1
+TAMIL SYLLABLE CUU; 0B9A 0BC2
+TAMIL SYLLABLE CE;  0B9A 0BC6
+TAMIL SYLLABLE CEE; 0B9A 0BC7
+TAMIL SYLLABLE CAI; 0B9A 0BC8
+TAMIL SYLLABLE CO;  0B9A 0BCA
+TAMIL SYLLABLE COO; 0B9A 0BCB
+TAMIL SYLLABLE CAU; 0B9A 0BCC
+
+TAMIL SYLLABLE NYAA; 0B9E 0BBE
+TAMIL SYLLABLE NYI;  0B9E 0BBF
+TAMIL SYLLABLE NYII; 0B9E 0BC0
+TAMIL SYLLABLE NYU;  0B9E 0BC1
+TAMIL SYLLABLE NYUU; 0B9E 0BC2
+TAMIL SYLLABLE NYE;  0B9E 0BC6
+TAMIL SYLLABLE NYEE; 0B9E 0BC7
+TAMIL SYLLABLE NYAI; 0B9E 0BC8
+TAMIL SYLLABLE NYO;  0B9E 0BCA
+TAMIL SYLLABLE NYOO; 0B9E 0BCB
+TAMIL SYLLABLE NYAU; 0B9E 0BCC
+
+TAMIL SYLLABLE TTAA; 0B9F 0BBE
+TAMIL SYLLABLE TTI;  0B9F 0BBF
+TAMIL SYLLABLE TTII; 0B9F 0BC0
+TAMIL SYLLABLE TTU;  0B9F 0BC1
+TAMIL SYLLABLE TTUU; 0B9F 0BC2
+TAMIL SYLLABLE TTE;  0B9F 0BC6
+TAMIL SYLLABLE TTEE; 0B9F 0BC7
+TAMIL SYLLABLE TTAI; 0B9F 0BC8
+TAMIL SYLLABLE TTO;  0B9F 0BCA
+TAMIL SYLLABLE TTOO; 0B9F 0BCB
+TAMIL SYLLABLE TTAU; 0B9F 0BCC
+
+TAMIL SYLLABLE NNAA; 0BA3 0BBE
+TAMIL SYLLABLE NNI;  0BA3 0BBF
+TAMIL SYLLABLE NNII; 0BA3 0BC0
+TAMIL SYLLABLE NNU;  0BA3 0BC1
+TAMIL SYLLABLE NNUU; 0BA3 0BC2
+TAMIL SYLLABLE NNE;  0BA3 0BC6
+TAMIL SYLLABLE NNEE; 0BA3 0BC7
+TAMIL SYLLABLE NNAI; 0BA3 0BC8
+TAMIL SYLLABLE NNO;  0BA3 0BCA
+TAMIL SYLLABLE NNOO; 0BA3 0BCB
+TAMIL SYLLABLE NNAU; 0BA3 0BCC
+
+TAMIL SYLLABLE TAA; 0BA4 0BBE
+TAMIL SYLLABLE TI;  0BA4 0BBF
+TAMIL SYLLABLE TII; 0BA4 0BC0
+TAMIL SYLLABLE TU;  0BA4 0BC1
+TAMIL SYLLABLE TUU; 0BA4 0BC2
+TAMIL SYLLABLE TE;  0BA4 0BC6
+TAMIL SYLLABLE TEE; 0BA4 0BC7
+TAMIL SYLLABLE TAI; 0BA4 0BC8
+TAMIL SYLLABLE TO;  0BA4 0BCA
+TAMIL SYLLABLE TOO; 0BA4 0BCB
+TAMIL SYLLABLE TAU; 0BA4 0BCC
+
+TAMIL SYLLABLE NAA; 0BA8 0BBE
+TAMIL SYLLABLE NI;  0BA8 0BBF
+TAMIL SYLLABLE NII; 0BA8 0BC0
+TAMIL SYLLABLE NU;  0BA8 0BC1
+TAMIL SYLLABLE NUU; 0BA8 0BC2
+TAMIL SYLLABLE NE;  0BA8 0BC6
+TAMIL SYLLABLE NEE; 0BA8 0BC7
+TAMIL SYLLABLE NAI; 0BA8 0BC8
+TAMIL SYLLABLE NO;  0BA8 0BCA
+TAMIL SYLLABLE NOO; 0BA8 0BCB
+TAMIL SYLLABLE NAU; 0BA8 0BCC
+
+TAMIL SYLLABLE PAA; 0BAA 0BBE
+TAMIL SYLLABLE PI;  0BAA 0BBF
+TAMIL SYLLABLE PII; 0BAA 0BC0
+TAMIL SYLLABLE PU;  0BAA 0BC1
+TAMIL SYLLABLE PUU; 0BAA 0BC2
+TAMIL SYLLABLE PE;  0BAA 0BC6
+TAMIL SYLLABLE PEE; 0BAA 0BC7
+TAMIL SYLLABLE PAI; 0BAA 0BC8
+TAMIL SYLLABLE PO;  0BAA 0BCA
+TAMIL SYLLABLE POO; 0BAA 0BCB
+TAMIL SYLLABLE PAU; 0BAA 0BCC
+
+TAMIL SYLLABLE MAA; 0BAE 0BBE
+TAMIL SYLLABLE MI;  0BAE 0BBF
+TAMIL SYLLABLE MII; 0BAE 0BC0
+TAMIL SYLLABLE MU;  0BAE 0BC1
+TAMIL SYLLABLE MUU; 0BAE 0BC2
+TAMIL SYLLABLE ME;  0BAE 0BC6
+TAMIL SYLLABLE MEE; 0BAE 0BC7
+TAMIL SYLLABLE MAI; 0BAE 0BC8
+TAMIL SYLLABLE MO;  0BAE 0BCA
+TAMIL SYLLABLE MOO; 0BAE 0BCB
+TAMIL SYLLABLE MAU; 0BAE 0BCC
+
+TAMIL SYLLABLE YAA; 0BAF 0BBE
+TAMIL SYLLABLE YI;  0BAF 0BBF
+TAMIL SYLLABLE YII; 0BAF 0BC0
+TAMIL SYLLABLE YU;  0BAF 0BC1
+TAMIL SYLLABLE YUU; 0BAF 0BC2
+TAMIL SYLLABLE YE;  0BAF 0BC6
+TAMIL SYLLABLE YEE; 0BAF 0BC7
+TAMIL SYLLABLE YAI; 0BAF 0BC8
+TAMIL SYLLABLE YO;  0BAF 0BCA
+TAMIL SYLLABLE YOO; 0BAF 0BCB
+TAMIL SYLLABLE YAU; 0BAF 0BCC
+
+TAMIL SYLLABLE RAA; 0BB0 0BBE
+TAMIL SYLLABLE RI;  0BB0 0BBF
+TAMIL SYLLABLE RII; 0BB0 0BC0
+TAMIL SYLLABLE RU;  0BB0 0BC1
+TAMIL SYLLABLE RUU; 0BB0 0BC2
+TAMIL SYLLABLE RE;  0BB0 0BC6
+TAMIL SYLLABLE REE; 0BB0 0BC7
+TAMIL SYLLABLE RAI; 0BB0 0BC8
+TAMIL SYLLABLE RO;  0BB0 0BCA
+TAMIL SYLLABLE ROO; 0BB0 0BCB
+TAMIL SYLLABLE RAU; 0BB0 0BCC
+
+TAMIL SYLLABLE LAA; 0BB2 0BBE
+TAMIL SYLLABLE LI;  0BB2 0BBF
+TAMIL SYLLABLE LII; 0BB2 0BC0
+TAMIL SYLLABLE LU;  0BB2 0BC1
+TAMIL SYLLABLE LUU; 0BB2 0BC2
+TAMIL SYLLABLE LE;  0BB2 0BC6
+TAMIL SYLLABLE LEE; 0BB2 0BC7
+TAMIL SYLLABLE LAI; 0BB2 0BC8
+TAMIL SYLLABLE LO;  0BB2 0BCA
+TAMIL SYLLABLE LOO; 0BB2 0BCB
+TAMIL SYLLABLE LAU; 0BB2 0BCC
+
+TAMIL SYLLABLE VAA; 0BB5 0BBE
+TAMIL SYLLABLE VI;  0BB5 0BBF
+TAMIL SYLLABLE VII; 0BB5 0BC0
+TAMIL SYLLABLE VU;  0BB5 0BC1
+TAMIL SYLLABLE VUU; 0BB5 0BC2
+TAMIL SYLLABLE VE;  0BB5 0BC6
+TAMIL SYLLABLE VEE; 0BB5 0BC7
+TAMIL SYLLABLE VAI; 0BB5 0BC8
+TAMIL SYLLABLE VO;  0BB5 0BCA
+TAMIL SYLLABLE VOO; 0BB5 0BCB
+TAMIL SYLLABLE VAU; 0BB5 0BCC
+
+TAMIL SYLLABLE LLLAA; 0BB4 0BBE
+TAMIL SYLLABLE LLLI;  0BB4 0BBF
+TAMIL SYLLABLE LLLII; 0BB4 0BC0
+TAMIL SYLLABLE LLLU;  0BB4 0BC1
+TAMIL SYLLABLE LLLUU; 0BB4 0BC2
+TAMIL SYLLABLE LLLE;  0BB4 0BC6
+TAMIL SYLLABLE LLLEE; 0BB4 0BC7
+TAMIL SYLLABLE LLLAI; 0BB4 0BC8
+TAMIL SYLLABLE LLLO;  0BB4 0BCA
+TAMIL SYLLABLE LLLOO; 0BB4 0BCB
+TAMIL SYLLABLE LLLAU; 0BB4 0BCC
+
+TAMIL SYLLABLE LLAA; 0BB3 0BBE
+TAMIL SYLLABLE LLI;  0BB3 0BBF
+TAMIL SYLLABLE LLII; 0BB3 0BC0
+TAMIL SYLLABLE LLU;  0BB3 0BC1
+TAMIL SYLLABLE LLUU; 0BB3 0BC2
+TAMIL SYLLABLE LLE;  0BB3 0BC6
+TAMIL SYLLABLE LLEE; 0BB3 0BC7
+TAMIL SYLLABLE LLAI; 0BB3 0BC8
+TAMIL SYLLABLE LLO;  0BB3 0BCA
+TAMIL SYLLABLE LLOO; 0BB3 0BCB
+TAMIL SYLLABLE LLAU; 0BB3 0BCC
+
+TAMIL SYLLABLE RRAA; 0BB1 0BBE
+TAMIL SYLLABLE RRI;  0BB1 0BBF
+TAMIL SYLLABLE RRII; 0BB1 0BC0
+TAMIL SYLLABLE RRU;  0BB1 0BC1
+TAMIL SYLLABLE RRUU; 0BB1 0BC2
+TAMIL SYLLABLE RRE;  0BB1 0BC6
+TAMIL SYLLABLE RREE; 0BB1 0BC7
+TAMIL SYLLABLE RRAI; 0BB1 0BC8
+TAMIL SYLLABLE RRO;  0BB1 0BCA
+TAMIL SYLLABLE RROO; 0BB1 0BCB
+TAMIL SYLLABLE RRAU; 0BB1 0BCC
+
+TAMIL SYLLABLE NNNAA; 0BA9 0BBE
+TAMIL SYLLABLE NNNI;  0BA9 0BBF
+TAMIL SYLLABLE NNNII; 0BA9 0BC0
+TAMIL SYLLABLE NNNU;  0BA9 0BC1
+TAMIL SYLLABLE NNNUU; 0BA9 0BC2
+TAMIL SYLLABLE NNNE;  0BA9 0BC6
+TAMIL SYLLABLE NNNEE; 0BA9 0BC7
+TAMIL SYLLABLE NNNAI; 0BA9 0BC8
+TAMIL SYLLABLE NNNO;  0BA9 0BCA
+TAMIL SYLLABLE NNNOO; 0BA9 0BCB
+TAMIL SYLLABLE NNNAU; 0BA9 0BCC
+
+TAMIL SYLLABLE JAA; 0B9C 0BBE
+TAMIL SYLLABLE JI;  0B9C 0BBF
+TAMIL SYLLABLE JII; 0B9C 0BC0
+TAMIL SYLLABLE JU;  0B9C 0BC1
+TAMIL SYLLABLE JUU; 0B9C 0BC2
+TAMIL SYLLABLE JE;  0B9C 0BC6
+TAMIL SYLLABLE JEE; 0B9C 0BC7
+TAMIL SYLLABLE JAI; 0B9C 0BC8
+TAMIL SYLLABLE JO;  0B9C 0BCA
+TAMIL SYLLABLE JOO; 0B9C 0BCB
+TAMIL SYLLABLE JAU; 0B9C 0BCC
+
+TAMIL SYLLABLE SHAA; 0BB6 0BBE
+TAMIL SYLLABLE SHI;  0BB6 0BBF
+TAMIL SYLLABLE SHII; 0BB6 0BC0
+TAMIL SYLLABLE SHU;  0BB6 0BC1
+TAMIL SYLLABLE SHUU; 0BB6 0BC2
+TAMIL SYLLABLE SHE;  0BB6 0BC6
+TAMIL SYLLABLE SHEE; 0BB6 0BC7
+TAMIL SYLLABLE SHAI; 0BB6 0BC8
+TAMIL SYLLABLE SHO;  0BB6 0BCA
+TAMIL SYLLABLE SHOO; 0BB6 0BCB
+TAMIL SYLLABLE SHAU; 0BB6 0BCC


More information about the pypy-commit mailing list