From birkenfeld at users.sourceforge.net Sat Oct 1 18:32:34 2005 From: birkenfeld at users.sourceforge.net (birkenfeld@users.sourceforge.net) Date: Sat, 1 Oct 2005 18:32:34 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS,1.1381,1.1382 Message-ID: <20051001163234.DF5BB1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv1616/Misc Modified Files: NEWS Log Message: bug [ 729103 ] Cannot retrieve name of super object Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1381 retrieving revision 1.1382 diff -u -d -r1.1381 -r1.1382 --- NEWS 30 Sep 2005 04:46:49 -0000 1.1381 +++ NEWS 1 Oct 2005 16:32:31 -0000 1.1382 @@ -245,6 +245,9 @@ Library ------- +- Bug #729103: pydoc.py: Fix docother() method to accept additional + "parent" argument. + - Patch #1300515: xdrlib.py: Fix pack_fstring() to really use null bytes for padding. From birkenfeld at users.sourceforge.net Sat Oct 1 18:32:35 2005 From: birkenfeld at users.sourceforge.net (birkenfeld@users.sourceforge.net) Date: Sat, 1 Oct 2005 18:32:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib pydoc.py,1.106,1.107 Message-ID: <20051001163235.0AD511E4006@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv1616/Lib Modified Files: pydoc.py Log Message: bug [ 729103 ] Cannot retrieve name of super object Index: pydoc.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/pydoc.py,v retrieving revision 1.106 retrieving revision 1.107 diff -u -d -r1.106 -r1.107 --- pydoc.py 22 Jul 2005 21:52:25 -0000 1.106 +++ pydoc.py 1 Oct 2005 16:32:31 -0000 1.107 @@ -1079,7 +1079,7 @@ if data: contents = [] for key, value in data: - contents.append(self.docother(value, key, name, 70)) + contents.append(self.docother(value, key, name, maxlen=70)) result = result + self.section('DATA', join(contents, '\n')) if hasattr(object, '__version__'): @@ -1164,7 +1164,7 @@ else: doc = None push(self.docother(getattr(object, name), - name, mod, 70, doc) + '\n') + name, mod, maxlen=70, doc=doc) + '\n') return attrs attrs = filter(lambda (name, kind, cls, value): visiblename(name), @@ -1272,7 +1272,7 @@ """Produce text documentation for a property.""" return self._docdescriptor(name, object, mod) - def docother(self, object, name=None, mod=None, maxlen=None, doc=None): + def docother(self, object, name=None, mod=None, parent=None, maxlen=None, doc=None): """Produce text documentation for a data object.""" repr = self.repr(object) if maxlen: From birkenfeld at users.sourceforge.net Sat Oct 1 18:32:43 2005 From: birkenfeld at users.sourceforge.net (birkenfeld@users.sourceforge.net) Date: Sat, 1 Oct 2005 18:32:43 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS, 1.1193.2.115, 1.1193.2.116 Message-ID: <20051001163243.BBFE11E4006@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv1642/Misc Modified Files: Tag: release24-maint NEWS Log Message: backport bug [ 729103 ] Cannot retrieve name of super object Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1193.2.115 retrieving revision 1.1193.2.116 diff -u -d -r1.1193.2.115 -r1.1193.2.116 --- NEWS 30 Sep 2005 04:58:23 -0000 1.1193.2.115 +++ NEWS 1 Oct 2005 16:32:40 -0000 1.1193.2.116 @@ -20,6 +20,9 @@ Library ------- +- Bug #729103: pydoc.py: Fix docother() method to accept additional + "parent" argument. + - Patch #1300515: xdrlib.py: Fix pack_fstring() to really use null bytes for padding. From birkenfeld at users.sourceforge.net Sat Oct 1 18:32:43 2005 From: birkenfeld at users.sourceforge.net (birkenfeld@users.sourceforge.net) Date: Sat, 1 Oct 2005 18:32:43 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib pydoc.py, 1.100.2.4, 1.100.2.5 Message-ID: <20051001163243.C6B651E4007@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv1642/Lib Modified Files: Tag: release24-maint pydoc.py Log Message: backport bug [ 729103 ] Cannot retrieve name of super object Index: pydoc.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/pydoc.py,v retrieving revision 1.100.2.4 retrieving revision 1.100.2.5 diff -u -d -r1.100.2.4 -r1.100.2.5 --- pydoc.py 22 Jul 2005 21:52:33 -0000 1.100.2.4 +++ pydoc.py 1 Oct 2005 16:32:40 -0000 1.100.2.5 @@ -1078,7 +1078,7 @@ if data: contents = [] for key, value in data: - contents.append(self.docother(value, key, name, 70)) + contents.append(self.docother(value, key, name, maxlen=70)) result = result + self.section('DATA', join(contents, '\n')) if hasattr(object, '__version__'): @@ -1163,7 +1163,7 @@ else: doc = None push(self.docother(getattr(object, name), - name, mod, 70, doc) + '\n') + name, mod, maxlen=70, doc=doc) + '\n') return attrs attrs = filter(lambda (name, kind, cls, value): visiblename(name), @@ -1282,7 +1282,7 @@ """Produce text documentation for a property.""" return self._docproperty(name, object, mod) - def docother(self, object, name=None, mod=None, maxlen=None, doc=None): + def docother(self, object, name=None, mod=None, parent=None, maxlen=None, doc=None): """Produce text documentation for a data object.""" repr = self.repr(object) if maxlen: From birkenfeld at users.sourceforge.net Sat Oct 1 19:06:03 2005 From: birkenfeld at users.sourceforge.net (birkenfeld@users.sourceforge.net) Date: Sat, 1 Oct 2005 19:06:03 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Objects stringobject.c, 2.232, 2.233 Message-ID: <20051001170603.B916D1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Objects In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv7536/Objects Modified Files: stringobject.c Log Message: Fix PyString_Format so that the "%s" format works again when Unicode is not enabled. Index: stringobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v retrieving revision 2.232 retrieving revision 2.233 diff -u -d -r2.232 -r2.233 --- stringobject.c 31 Aug 2005 23:02:05 -0000 2.232 +++ stringobject.c 1 Oct 2005 17:06:00 -0000 2.233 @@ -4083,7 +4083,9 @@ argidx = argidx_start; goto unicode; } +#endif temp = _PyObject_Str(v); +#ifdef Py_USING_UNICODE if (temp != NULL && PyUnicode_Check(temp)) { Py_DECREF(temp); fmt = fmt_start; From nnorwitz at users.sourceforge.net Sun Oct 2 03:48:54 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Sun, 2 Oct 2005 03:48:54 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS,1.1382,1.1383 Message-ID: <20051002014854.21CA51E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8132/Misc Modified Files: NEWS Log Message: - Fix segfault with invalid coding. - SF Bug #772896, unknown encoding results in MemoryError, which is not helpful I will only backport the segfault fix. I'll let Anthony decide if he wants the other changes backported. I will do the backport if asked. Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1382 retrieving revision 1.1383 diff -u -d -r1.1382 -r1.1383 --- NEWS 1 Oct 2005 16:32:31 -0000 1.1382 +++ NEWS 2 Oct 2005 01:48:49 -0000 1.1383 @@ -12,6 +12,10 @@ Core and builtins ----------------- +- Fix segfault with invalid coding. + +- SF bug #772896: unknown encoding results in MemoryError. + - All iterators now have a Boolean value of true. Formerly, some iterators supported a __len__() method which evaluated to False when the iterator was empty. From nnorwitz at users.sourceforge.net Sun Oct 2 03:48:54 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Sun, 2 Oct 2005 03:48:54 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Parser parsetok.c, 2.36, 2.37 pgenmain.c, 2.31, 2.32 tokenizer.c, 2.78, 2.79 Message-ID: <20051002014854.2EC581E4006@bag.python.org> Update of /cvsroot/python/python/dist/src/Parser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8132/Parser Modified Files: parsetok.c pgenmain.c tokenizer.c Log Message: - Fix segfault with invalid coding. - SF Bug #772896, unknown encoding results in MemoryError, which is not helpful I will only backport the segfault fix. I'll let Anthony decide if he wants the other changes backported. I will do the backport if asked. Index: parsetok.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Parser/parsetok.c,v retrieving revision 2.36 retrieving revision 2.37 diff -u -d -r2.36 -r2.37 --- parsetok.c 8 Jul 2004 01:54:07 -0000 2.36 +++ parsetok.c 2 Oct 2005 01:48:51 -0000 2.37 @@ -42,7 +42,7 @@ initerr(err_ret, filename); if ((tok = PyTokenizer_FromString(s)) == NULL) { - err_ret->error = E_NOMEM; + err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; return NULL; } Index: pgenmain.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Parser/pgenmain.c,v retrieving revision 2.31 retrieving revision 2.32 diff -u -d -r2.31 -r2.32 --- pgenmain.c 7 Feb 2004 13:53:46 -0000 2.31 +++ pgenmain.c 2 Oct 2005 01:48:51 -0000 2.32 @@ -116,6 +116,13 @@ return g; } +/* Can't happen in pgen */ +PyObject* +PyErr_Occurred() +{ + return 0; +} + void Py_FatalError(const char *msg) { Index: tokenizer.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Parser/tokenizer.c,v retrieving revision 2.78 retrieving revision 2.79 diff -u -d -r2.78 -r2.79 --- tokenizer.c 12 Jul 2005 21:53:43 -0000 2.78 +++ tokenizer.c 2 Oct 2005 01:48:51 -0000 2.79 @@ -603,8 +603,11 @@ if (tok->enc != NULL) { assert(utf8 == NULL); utf8 = translate_into_utf8(str, tok->enc); - if (utf8 == NULL) + if (utf8 == NULL) { + PyErr_Format(PyExc_SyntaxError, + "unknown encoding: %s", tok->enc); return NULL; + } str = PyString_AsString(utf8); } #endif From nnorwitz at users.sourceforge.net Sun Oct 2 03:48:54 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Sun, 2 Oct 2005 03:48:54 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test bad_coding.py, NONE, 1.1 test_coding.py, NONE, 1.1 Message-ID: <20051002014854.371E21E4007@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8132/Lib/test Added Files: bad_coding.py test_coding.py Log Message: - Fix segfault with invalid coding. - SF Bug #772896, unknown encoding results in MemoryError, which is not helpful I will only backport the segfault fix. I'll let Anthony decide if he wants the other changes backported. I will do the backport if asked. --- NEW FILE: bad_coding.py --- # -*- coding: uft-8 -*- --- NEW FILE: test_coding.py --- import test.test_support, unittest import os class CodingTest(unittest.TestCase): def test_bad_coding(self): module_name = 'bad_coding' self.assertRaises(SyntaxError, __import__, 'test.' + module_name) path = os.path.dirname(__file__) filename = os.path.join(path, module_name + '.py') fp = open(filename) text = fp.read() fp.close() self.assertRaises(SyntaxError, compile, text, filename, 'exec') def test_main(): test.test_support.run_unittest(CodingTest) if __name__ == "__main__": test_main() From nnorwitz at users.sourceforge.net Sun Oct 2 03:48:54 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Sun, 2 Oct 2005 03:48:54 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python pythonrun.c,2.216,2.217 Message-ID: <20051002014854.407DF1E4008@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8132/Python Modified Files: pythonrun.c Log Message: - Fix segfault with invalid coding. - SF Bug #772896, unknown encoding results in MemoryError, which is not helpful I will only backport the segfault fix. I'll let Anthony decide if he wants the other changes backported. I will do the backport if asked. Index: pythonrun.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/pythonrun.c,v retrieving revision 2.216 retrieving revision 2.217 diff -u -d -r2.216 -r2.217 --- pythonrun.c 24 Aug 2005 08:39:24 -0000 2.216 +++ pythonrun.c 2 Oct 2005 01:48:50 -0000 2.217 @@ -1487,7 +1487,7 @@ msg = "unknown decode error"; Py_DECREF(type); Py_DECREF(value); - Py_DECREF(tb); + Py_XDECREF(tb); break; } case E_LINECONT: From nnorwitz at users.sourceforge.net Sun Oct 2 03:51:59 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Sun, 2 Oct 2005 03:51:59 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python pythonrun.c, 2.211.2.2, 2.211.2.3 Message-ID: <20051002015159.D2C811E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8741/Python Modified Files: Tag: release24-maint pythonrun.c Log Message: Backported: Fix segfault with invalid coding. (See SF bug: 772896 for another fix) Index: pythonrun.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/pythonrun.c,v retrieving revision 2.211.2.2 retrieving revision 2.211.2.3 diff -u -d -r2.211.2.2 -r2.211.2.3 --- pythonrun.c 24 Aug 2005 08:39:46 -0000 2.211.2.2 +++ pythonrun.c 2 Oct 2005 01:51:56 -0000 2.211.2.3 @@ -1484,7 +1484,7 @@ msg = "unknown decode error"; Py_DECREF(type); Py_DECREF(value); - Py_DECREF(tb); + Py_XDECREF(tb); break; } default: From nnorwitz at users.sourceforge.net Sun Oct 2 03:52:00 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Sun, 2 Oct 2005 03:52:00 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS, 1.1193.2.116, 1.1193.2.117 Message-ID: <20051002015200.1DDF21E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8741/Misc Modified Files: Tag: release24-maint NEWS Log Message: Backported: Fix segfault with invalid coding. (See SF bug: 772896 for another fix) Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1193.2.116 retrieving revision 1.1193.2.117 diff -u -d -r1.1193.2.116 -r1.1193.2.117 --- NEWS 1 Oct 2005 16:32:40 -0000 1.1193.2.116 +++ NEWS 2 Oct 2005 01:51:56 -0000 1.1193.2.117 @@ -9,6 +9,11 @@ *Release date: XX-XX-200X* +Core and builtins +----------------- + +- Fix segfault with invalid coding. + Extension Modules ----------------- From kbk at users.sourceforge.net Mon Oct 3 01:36:49 2005 From: kbk at users.sourceforge.net (kbk@users.sourceforge.net) Date: Mon, 3 Oct 2005 01:36:49 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/idlelib CodeContext.py, 1.4, 1.5 NEWS.txt, 1.62, 1.63 Message-ID: <20051002233649.C85B11E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/idlelib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32569 Modified Files: CodeContext.py NEWS.txt Log Message: Increased performance in CodeContext extension Patch 936169 Noam Raphael Index: CodeContext.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/idlelib/CodeContext.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- CodeContext.py 6 Jun 2004 01:29:21 -0000 1.4 +++ CodeContext.py 2 Oct 2005 23:36:46 -0000 1.5 @@ -14,10 +14,10 @@ from configHandler import idleConf from sets import Set import re +from sys import maxint as INFINITY BLOCKOPENERS = Set(["class", "def", "elif", "else", "except", "finally", "for", "if", "try", "while"]) -INFINITY = 1 << 30 UPDATEINTERVAL = 100 # millisec FONTUPDATEINTERVAL = 1000 # millisec @@ -37,8 +37,12 @@ self.text = editwin.text self.textfont = self.text["font"] self.label = None - # Dummy line, which starts the "block" of the whole document: - self.info = list(self.interesting_lines(1)) + # self.info holds information about the context lines of line number + # self.lastfirstline. The information is a tuple of the line's + # indentation, the line's text and the keyword at the beginning of the + # line, as returned by get_line_info. At the beginning of the list + # there's a dummy line, which starts the "block" of the whole document. + self.info = [(0, -1, "", False)] self.lastfirstline = 1 visible = idleConf.GetOption("extensions", "CodeContext", "visible", type="bool", default=False) @@ -73,14 +77,7 @@ If the line does not start a block, the keyword value is False. The indentation of empty lines (or comment lines) is INFINITY. - There is a dummy block start, with indentation -1 and text "". - - Return the indent level, text (including leading whitespace), - and the block opening keyword. - """ - if linenum == 0: - return -1, "", True text = self.text.get("%d.0" % linenum, "%d.end" % linenum) spaces, firstword = getspacesfirstword(text) opener = firstword in BLOCKOPENERS and firstword @@ -90,40 +87,59 @@ indent = len(spaces) return indent, text, opener - def interesting_lines(self, firstline): - """Generator which yields context lines, starting at firstline.""" + def interesting_lines(self, firstline, stopline=1, stopindent=0): + """ + Find the context lines, starting at firstline. + Will not return lines whose index is smaller than stopline or whose + indentation is smaller than stopindent. + stopline should always be >= 1, so the dummy block start will never + be returned (This function doesn't know what to do about it.) + Returns a list with the context lines, starting from the first (top), + and a number which all context lines above the inspected region should + have a smaller indentation than it. + """ + lines = [] # The indentation level we are currently in: lastindent = INFINITY # For a line to be interesting, it must begin with a block opening # keyword, and have less indentation than lastindent. - for line_index in xrange(firstline, -1, -1): + for line_index in xrange(firstline, stopline-1, -1): indent, text, opener = self.get_line_info(line_index) if indent < lastindent: lastindent = indent if opener in ("else", "elif"): # We also show the if statement lastindent += 1 - if opener and line_index < firstline: - yield line_index, text + if opener and line_index < firstline and indent >= stopindent: + lines.append((line_index, indent, text, opener)) + if lastindent <= stopindent: + break + lines.reverse() + return lines, lastindent def update_label(self): + """Update the CodeContext label, if needed. + """ firstline = int(self.text.index("@0,0").split('.')[0]) if self.lastfirstline == firstline: return - self.lastfirstline = firstline - tmpstack = [] - for line_index, text in self.interesting_lines(firstline): - # Remove irrelevant self.info items, and when we reach a relevant - # item (which must happen because of the dummy element), break. - while self.info[-1][0] > line_index: + if self.lastfirstline < firstline: + lines, lastindent = self.interesting_lines(firstline, + self.lastfirstline) + while self.info[-1][1] >= lastindent: del self.info[-1] - if self.info[-1][0] == line_index: - break - tmpstack.append((line_index, text)) - while tmpstack: - self.info.append(tmpstack.pop()) + self.info.extend(lines) + else: + stopindent = self.info[-1][1] + 1 + while self.info[-1][0] >= firstline: + stopindent = self.info[-1][1] + del self.info[-1] + lines, lastindent = self.interesting_lines( + firstline, self.info[-1][0]+1, stopindent) + self.info.extend(lines) + self.lastfirstline = firstline lines = [""] * max(0, self.numlines - len(self.info)) + \ - [x[1] for x in self.info[-self.numlines:]] + [x[2] for x in self.info[-self.numlines:]] self.label["text"] = '\n'.join(lines) def timer_event(self): Index: NEWS.txt =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/idlelib/NEWS.txt,v retrieving revision 1.62 retrieving revision 1.63 diff -u -d -r1.62 -r1.63 --- NEWS.txt 23 Aug 2005 02:27:23 -0000 1.62 +++ NEWS.txt 2 Oct 2005 23:36:46 -0000 1.63 @@ -3,6 +3,8 @@ *Release date: XX-XXX-2005* +- Increased performance in CodeContext extension Patch 936169 Noam Raphael + - Mac line endings were incorrect when pasting code from some browsers when using X11 and the Fink distribution. Python Bug 1263656. @@ -148,12 +150,12 @@ - If nulls somehow got into the strings in recent-files.lst EditorWindow.update_recent_files_list() was failing. Python Bug 931336. -- If the normal background is changed via Configure/Highlighting, it will update - immediately, thanks to the previously mentioned patch by Nigel Rowe. +- If the normal background is changed via Configure/Highlighting, it will + update immediately, thanks to the previously mentioned patch by Nigel Rowe. - Add a highlight theme for builtin keywords. Python Patch 805830 Nigel Rowe - This also fixed IDLEfork bug [ 693418 ] Normal text background color not refreshed - and Python bug [897872 ] Unknown color name on HP-UX + This also fixed IDLEfork bug [ 693418 ] Normal text background color not + refreshed and Python bug [897872 ] Unknown color name on HP-UX - rpc.py:SocketIO - Large modules were generating large pickles when downloaded to the execution server. The return of the OK response from the subprocess From nnorwitz at users.sourceforge.net Mon Oct 3 02:36:19 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 02:36:19 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib asttable.tex,1.2,1.3 Message-ID: <20051003003619.F11F81E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12751/lib Modified Files: asttable.tex Log Message: SF patch #1227568, bug #1219273, Expression AST node not documented. Backport candidate if anyone cares. Index: asttable.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/asttable.tex,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- asttable.tex 2 Aug 2004 06:09:52 -0000 1.2 +++ asttable.tex 3 Oct 2005 00:36:16 -0000 1.3 @@ -89,6 +89,8 @@ \lineiii{Ellipsis}{}{} \hline +\lineiii{Expression}{\member{node}}{} + \lineiii{Exec}{\member{expr}}{} \lineiii{}{\member{locals}}{} \lineiii{}{\member{globals}}{} From nnorwitz at users.sourceforge.net Mon Oct 3 02:38:49 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 02:38:49 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc ACKS,1.297,1.298 Message-ID: <20051003003849.CFCE11E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13020/Misc Modified Files: ACKS Log Message: SF patch #1227568, Expression AST node not documented. Index: ACKS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/ACKS,v retrieving revision 1.297 retrieving revision 1.298 diff -u -d -r1.297 -r1.298 --- ACKS 30 Sep 2005 04:46:49 -0000 1.297 +++ ACKS 3 Oct 2005 00:38:46 -0000 1.298 @@ -316,6 +316,7 @@ Bob Kahn Kurt B. Kaiser Tamito Kajiyama +Peter van Kampen Jacob Kaplan-Moss Lou Kates Sebastien Keim From nnorwitz at users.sourceforge.net Mon Oct 3 02:44:10 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 02:44:10 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libsocket.tex,1.89,1.90 Message-ID: <20051003004410.080C91E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13761/Doc/lib Modified Files: libsocket.tex Log Message: SF Bug #1308042, platform dependent behaviour (Windoze of course) in UDP sockets. Will backport Index: libsocket.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libsocket.tex,v retrieving revision 1.89 retrieving revision 1.90 diff -u -d -r1.89 -r1.90 --- libsocket.tex 11 Mar 2005 00:04:17 -0000 1.89 +++ libsocket.tex 3 Oct 2005 00:44:06 -0000 1.90 @@ -7,7 +7,8 @@ This module provides access to the BSD \emph{socket} interface. It is available on all modern \UNIX{} systems, Windows, MacOS, BeOS, -OS/2, and probably additional platforms. +OS/2, and probably additional platforms. \note{Some behavior may be +platform dependent, since calls are made to the operating system socket APIs.} For an introduction to socket programming (in C), see the following papers: \citetitle{An Introductory 4.3BSD Interprocess Communication From nnorwitz at users.sourceforge.net Mon Oct 3 02:44:48 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 02:44:48 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libsocket.tex, 1.88.2.1, 1.88.2.2 Message-ID: <20051003004448.9EE3C1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13885/Doc/lib Modified Files: Tag: release24-maint libsocket.tex Log Message: Backport: SF Bug #1308042, platform dependent behaviour (Windoze of course) in UDP sockets. Index: libsocket.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libsocket.tex,v retrieving revision 1.88.2.1 retrieving revision 1.88.2.2 diff -u -d -r1.88.2.1 -r1.88.2.2 --- libsocket.tex 12 Mar 2005 06:15:54 -0000 1.88.2.1 +++ libsocket.tex 3 Oct 2005 00:44:45 -0000 1.88.2.2 @@ -7,7 +7,8 @@ This module provides access to the BSD \emph{socket} interface. It is available on all modern \UNIX{} systems, Windows, MacOS, BeOS, -OS/2, and probably additional platforms. +OS/2, and probably additional platforms. \note{Some behavior may be +platform dependent, since calls are made to the operating system socket APIs.} For an introduction to socket programming (in C), see the following papers: \citetitle{An Introductory 4.3BSD Interprocess Communication From nnorwitz at users.sourceforge.net Mon Oct 3 02:55:00 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 02:55:00 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_cmd_line.py, NONE, 1.1 Message-ID: <20051003005500.B7DBB1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv15556/Lib/test Added Files: test_cmd_line.py Log Message: SF bug #887946, segfault if redirecting directory Also provide a warning if a directory is passed on the command line. Add minimal command line test. Will backport. --- NEW FILE: test_cmd_line.py --- import test.test_support, unittest import sys import popen2 class CmdLineTest(unittest.TestCase): def start_python(self, cmd_line): outfp, infp = popen2.popen4('%s %s' % (sys.executable, cmd_line)) infp.close() data = outfp.read() outfp.close() return data def test_directories(self): self.assertTrue('is a directory' in self.start_python('.')) self.assertTrue('is a directory' in self.start_python('< .')) def verify_valid_flag(self, cmd_line): data = self.start_python(cmd_line) self.assertTrue(data.endswith('\n')) self.assertTrue('Traceback' not in data) def test_environment(self): self.verify_valid_flag('-E') def test_optimize(self): self.verify_valid_flag('-O') self.verify_valid_flag('-OO') def test_q(self): self.verify_valid_flag('-Qold') self.verify_valid_flag('-Qnew') self.verify_valid_flag('-Qwarn') self.verify_valid_flag('-Qwarnall') def test_site_flag(self): self.verify_valid_flag('-S') def test_usage(self): self.assertTrue('usage' in self.start_python('-h')) def test_version(self): version = 'Python %d.%d' % sys.version_info[:2] self.assertTrue(self.start_python('-V').startswith(version)) def test_main(): test.test_support.run_unittest(CmdLineTest) if __name__ == "__main__": test_main() From nnorwitz at users.sourceforge.net Mon Oct 3 02:55:00 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 02:55:00 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS,1.1383,1.1384 Message-ID: <20051003005500.E100C1E4006@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv15556/Misc Modified Files: NEWS Log Message: SF bug #887946, segfault if redirecting directory Also provide a warning if a directory is passed on the command line. Add minimal command line test. Will backport. Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1383 retrieving revision 1.1384 diff -u -d -r1.1383 -r1.1384 --- NEWS 2 Oct 2005 01:48:49 -0000 1.1383 +++ NEWS 3 Oct 2005 00:54:56 -0000 1.1384 @@ -12,6 +12,9 @@ Core and builtins ----------------- +- SF Bug #887946: fix segfault when redirecting stdin from a directory. + Provide a warning when a directory is passed on the command line. + - Fix segfault with invalid coding. - SF bug #772896: unknown encoding results in MemoryError. From nnorwitz at users.sourceforge.net Mon Oct 3 02:55:00 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 02:55:00 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Modules main.c,1.84,1.85 Message-ID: <20051003005500.E2B1C1E4007@bag.python.org> Update of /cvsroot/python/python/dist/src/Modules In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv15556/Modules Modified Files: main.c Log Message: SF bug #887946, segfault if redirecting directory Also provide a warning if a directory is passed on the command line. Add minimal command line test. Will backport. Index: main.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/main.c,v retrieving revision 1.84 retrieving revision 1.85 diff -u -d -r1.84 -r1.85 --- main.c 7 Oct 2004 06:46:25 -0000 1.84 +++ main.c 3 Oct 2005 00:54:57 -0000 1.85 @@ -359,6 +359,14 @@ } } } + { + /* XXX: does this work on Win/Win64? (see posix_fstat) */ + struct stat sb; + if (fstat(fileno(fp), &sb) == 0 && + S_ISDIR(sb.st_mode)) { + fprintf(stderr, "%s: warning '%s' is a directory\n", argv[0], filename); + } + } } } From nnorwitz at users.sourceforge.net Mon Oct 3 02:55:00 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 02:55:00 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python sysmodule.c,2.129,2.130 Message-ID: <20051003005500.EB2641E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv15556/Python Modified Files: sysmodule.c Log Message: SF bug #887946, segfault if redirecting directory Also provide a warning if a directory is passed on the command line. Add minimal command line test. Will backport. Index: sysmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/sysmodule.c,v retrieving revision 2.129 retrieving revision 2.130 diff -u -d -r2.129 -r2.130 --- sysmodule.c 15 Sep 2005 13:00:34 -0000 2.129 +++ sysmodule.c 3 Oct 2005 00:54:57 -0000 2.130 @@ -947,6 +947,15 @@ m = Py_InitModule3("sys", sys_methods, sys_doc); sysdict = PyModule_GetDict(m); + { + /* XXX: does this work on Win/Win64? (see posix_fstat) */ + struct stat sb; + if (fstat(fileno(stdin), &sb) == 0 && + S_ISDIR(sb.st_mode)) { + Py_FatalError(" is a directory"); + } + } + /* Closing the standard FILE* if sys.std* goes aways causes problems * for embedded Python usages. Closing them when somebody explicitly * invokes .close() might be possible, but the FAQ promises they get From nnorwitz at users.sourceforge.net Mon Oct 3 03:03:48 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 03:03:48 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS, 1.1193.2.117, 1.1193.2.118 Message-ID: <20051003010348.6D0BC1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17796/Misc Modified Files: Tag: release24-maint NEWS Log Message: Backport: SF bug #887946, segfault if redirecting directory Also provide a warning if a directory is passed on the command line. Add minimal command line test. Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1193.2.117 retrieving revision 1.1193.2.118 diff -u -d -r1.1193.2.117 -r1.1193.2.118 --- NEWS 2 Oct 2005 01:51:56 -0000 1.1193.2.117 +++ NEWS 3 Oct 2005 01:03:44 -0000 1.1193.2.118 @@ -12,6 +12,9 @@ Core and builtins ----------------- +- SF Bug #887946: fix segfault when redirecting stdin from a directory. + Provide a warning when a directory is passed on the command line. + - Fix segfault with invalid coding. Extension Modules From nnorwitz at users.sourceforge.net Mon Oct 3 03:03:48 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 03:03:48 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Modules main.c,1.84,1.84.2.1 Message-ID: <20051003010348.7BCA11E4006@bag.python.org> Update of /cvsroot/python/python/dist/src/Modules In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17796/Modules Modified Files: Tag: release24-maint main.c Log Message: Backport: SF bug #887946, segfault if redirecting directory Also provide a warning if a directory is passed on the command line. Add minimal command line test. Index: main.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/main.c,v retrieving revision 1.84 retrieving revision 1.84.2.1 diff -u -d -r1.84 -r1.84.2.1 --- main.c 7 Oct 2004 06:46:25 -0000 1.84 +++ main.c 3 Oct 2005 01:03:45 -0000 1.84.2.1 @@ -359,6 +359,14 @@ } } } + { + /* XXX: does this work on Win/Win64? (see posix_fstat) */ + struct stat sb; + if (fstat(fileno(fp), &sb) == 0 && + S_ISDIR(sb.st_mode)) { + fprintf(stderr, "%s: warning '%s' is a directory\n", argv[0], filename); + } + } } } From nnorwitz at users.sourceforge.net Mon Oct 3 03:03:48 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 03:03:48 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python sysmodule.c, 2.126.2.3, 2.126.2.4 Message-ID: <20051003010348.E0E771E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17796/Python Modified Files: Tag: release24-maint sysmodule.c Log Message: Backport: SF bug #887946, segfault if redirecting directory Also provide a warning if a directory is passed on the command line. Add minimal command line test. Index: sysmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/sysmodule.c,v retrieving revision 2.126.2.3 retrieving revision 2.126.2.4 diff -u -d -r2.126.2.3 -r2.126.2.4 --- sysmodule.c 15 Sep 2005 13:00:25 -0000 2.126.2.3 +++ sysmodule.c 3 Oct 2005 01:03:45 -0000 2.126.2.4 @@ -947,6 +947,15 @@ m = Py_InitModule3("sys", sys_methods, sys_doc); sysdict = PyModule_GetDict(m); + { + /* XXX: does this work on Win/Win64? (see posix_fstat) */ + struct stat sb; + if (fstat(fileno(stdin), &sb) == 0 && + S_ISDIR(sb.st_mode)) { + Py_FatalError(" is a directory"); + } + } + /* Closing the standard FILE* if sys.std* goes aways causes problems * for embedded Python usages. Closing them when somebody explicitly * invokes .close() might be possible, but the FAQ promises they get From nnorwitz at users.sourceforge.net Mon Oct 3 03:03:48 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 03:03:48 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_cmd_line.py, NONE, 1.1.2.2 Message-ID: <20051003010348.EF9E91E4006@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17796/Lib/test Added Files: Tag: release24-maint test_cmd_line.py Log Message: Backport: SF bug #887946, segfault if redirecting directory Also provide a warning if a directory is passed on the command line. Add minimal command line test. --- NEW FILE: test_cmd_line.py --- import test.test_support, unittest import sys import popen2 class CmdLineTest(unittest.TestCase): def start_python(self, cmd_line): outfp, infp = popen2.popen4('%s %s' % (sys.executable, cmd_line)) infp.close() data = outfp.read() outfp.close() return data def test_directories(self): self.assertTrue('is a directory' in self.start_python('.')) self.assertTrue('is a directory' in self.start_python('< .')) def verify_valid_flag(self, cmd_line): data = self.start_python(cmd_line) self.assertTrue(data.endswith('\n')) self.assertTrue('Traceback' not in data) def test_environment(self): self.verify_valid_flag('-E') def test_optimize(self): self.verify_valid_flag('-O') self.verify_valid_flag('-OO') def test_q(self): self.verify_valid_flag('-Qold') self.verify_valid_flag('-Qnew') self.verify_valid_flag('-Qwarn') self.verify_valid_flag('-Qwarnall') def test_site_flag(self): self.verify_valid_flag('-S') def test_usage(self): self.assertTrue('usage' in self.start_python('-h')) def test_version(self): version = 'Python %d.%d' % sys.version_info[:2] self.assertTrue(self.start_python('-V').startswith(version)) def test_main(): test.test_support.run_unittest(CmdLineTest) if __name__ == "__main__": test_main() From nnorwitz at users.sourceforge.net Mon Oct 3 05:57:12 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 05:57:12 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Modules ld_so_aix,2.5,2.6 Message-ID: <20051003035712.9012A1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Modules In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13804/Modules Modified Files: ld_so_aix Log Message: SF Bug # 941346, AIX shared library fix Since I can't test this, I'm just adding a comment. If we get access to AIX boxes, we can test this and really resolve. Anyone from IBM want to offer help? Backport candidate I suppose. Index: ld_so_aix =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/ld_so_aix,v retrieving revision 2.5 retrieving revision 2.6 diff -u -d -r2.5 -r2.6 --- ld_so_aix 3 Sep 1997 00:45:30 -0000 2.5 +++ ld_so_aix 3 Oct 2005 03:57:09 -0000 2.6 @@ -168,6 +168,10 @@ CCOPT="-Wl,-e$entry -Wl,-bE:$expfile -Wl,-bI:$impfile -Wl,-bhalt:4" CCOPT="$CCOPT -Wl,-bM:SRE -Wl,-T512 -Wl,-H512 -lm -o $objfile" +# Note: to use dynamic libraries like libtcl8.4.so and libtk8.4.so +# you may need to replace the second CCOPT line above with the following: +# CCOPT="$CCOPT -Wl,-bM:SRE -Wl,-T512 -Wl,-H512 -brtl -bnortllib -lm -o $objfile" + CCARGS="$args" # Export list generation. From nnorwitz at users.sourceforge.net Mon Oct 3 06:48:18 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 06:48:18 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python import.c,2.244,2.245 Message-ID: <20051003044818.C20701E4006@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23608/Python Modified Files: import.c Log Message: Fix SF bug #976608, Unhelpful error message when mtime of a module is -1 Will backport. Index: import.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/import.c,v retrieving revision 2.244 retrieving revision 2.245 diff -u -d -r2.244 -r2.245 --- import.c 14 Sep 2005 18:09:42 -0000 2.244 +++ import.c 3 Oct 2005 04:48:15 -0000 2.245 @@ -868,8 +868,12 @@ PyObject *m; mtime = PyOS_GetLastModificationTime(pathname, fp); - if (mtime == (time_t)(-1)) + if (mtime == (time_t)(-1)) { + PyErr_Format(PyExc_RuntimeError, + "unable to get modification time from '%s'", + pathname); return NULL; + } #if SIZEOF_TIME_T > 4 /* Python's .pyc timestamp handling presumes that the timestamp fits in 4 bytes. This will be fine until sometime in the year 2038, From nnorwitz at users.sourceforge.net Mon Oct 3 06:48:18 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 06:48:18 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS,1.1384,1.1385 Message-ID: <20051003044818.C19DA1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23608/Misc Modified Files: NEWS Log Message: Fix SF bug #976608, Unhelpful error message when mtime of a module is -1 Will backport. Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1384 retrieving revision 1.1385 diff -u -d -r1.1384 -r1.1385 --- NEWS 3 Oct 2005 00:54:56 -0000 1.1384 +++ NEWS 3 Oct 2005 04:48:14 -0000 1.1385 @@ -12,6 +12,8 @@ Core and builtins ----------------- +- SF Bug #976608: fix SystemError when mtime of an imported file is -1. + - SF Bug #887946: fix segfault when redirecting stdin from a directory. Provide a warning when a directory is passed on the command line. From nnorwitz at users.sourceforge.net Mon Oct 3 06:50:58 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 06:50:58 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS, 1.1193.2.118, 1.1193.2.119 Message-ID: <20051003045058.B14A01E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24241/Misc Modified Files: Tag: release24-maint NEWS Log Message: Backport: Fix SF bug #976608, Unhelpful error message when mtime of a module is -1 Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1193.2.118 retrieving revision 1.1193.2.119 diff -u -d -r1.1193.2.118 -r1.1193.2.119 --- NEWS 3 Oct 2005 01:03:44 -0000 1.1193.2.118 +++ NEWS 3 Oct 2005 04:50:55 -0000 1.1193.2.119 @@ -12,6 +12,8 @@ Core and builtins ----------------- +- SF Bug #976608: fix SystemError when mtime of an imported file is -1. + - SF Bug #887946: fix segfault when redirecting stdin from a directory. Provide a warning when a directory is passed on the command line. From nnorwitz at users.sourceforge.net Mon Oct 3 06:50:58 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 06:50:58 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python import.c, 2.240.2.2, 2.240.2.3 Message-ID: <20051003045058.D69991E4006@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24241/Python Modified Files: Tag: release24-maint import.c Log Message: Backport: Fix SF bug #976608, Unhelpful error message when mtime of a module is -1 Index: import.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/import.c,v retrieving revision 2.240.2.2 retrieving revision 2.240.2.3 diff -u -d -r2.240.2.2 -r2.240.2.3 --- import.c 14 Sep 2005 18:15:03 -0000 2.240.2.2 +++ import.c 3 Oct 2005 04:50:55 -0000 2.240.2.3 @@ -867,8 +867,12 @@ PyObject *m; mtime = PyOS_GetLastModificationTime(pathname, fp); - if (mtime == (time_t)(-1)) + if (mtime == (time_t)(-1)) { + PyErr_Format(PyExc_RuntimeError, + "unable to get modification time from '%s'", + pathname); return NULL; + } #if SIZEOF_TIME_T > 4 /* Python's .pyc timestamp handling presumes that the timestamp fits in 4 bytes. This will be fine until sometime in the year 2038, From nnorwitz at users.sourceforge.net Mon Oct 3 07:13:49 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 07:13:49 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libos.tex,1.167,1.168 Message-ID: <20051003051349.E69591E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv28640/lib Modified Files: libos.tex Log Message: Fix SF bug #991735, os.access reports true for read-only directories. Will backport Index: libos.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libos.tex,v retrieving revision 1.167 retrieving revision 1.168 diff -u -d -r1.167 -r1.168 --- libos.tex 31 Aug 2005 13:50:17 -0000 1.167 +++ libos.tex 3 Oct 2005 05:13:46 -0000 1.168 @@ -692,6 +692,11 @@ open a file before actually doing so using \function{open()} creates a security hole, because the user might exploit the short time interval between checking and opening the file to manipulate it.} + +\note{I/O operations may fail even when \function{access()} +indicates that they would succeed, particularly for operations +on network filesystems which may have permissions semantics +beyond the usual \POSIX{} permission-bit model.} \end{funcdesc} \begin{datadesc}{F_OK} From nnorwitz at users.sourceforge.net Mon Oct 3 07:14:28 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 07:14:28 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libos.tex, 1.146.2.10, 1.146.2.11 Message-ID: <20051003051428.ACF5C1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv28694/Doc/lib Modified Files: Tag: release24-maint libos.tex Log Message: Fix SF bug #991735, os.access reports true for read-only directories. Will backport Index: libos.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libos.tex,v retrieving revision 1.146.2.10 retrieving revision 1.146.2.11 diff -u -d -r1.146.2.10 -r1.146.2.11 --- libos.tex 18 Jul 2005 08:17:08 -0000 1.146.2.10 +++ libos.tex 3 Oct 2005 05:14:25 -0000 1.146.2.11 @@ -672,6 +672,11 @@ open a file before actually doing so using \function{open()} creates a security hole, because the user might exploit the short time interval between checking and opening the file to manipulate it.} + +\note{I/O operations may fail even when \function{access()} +indicates that they would succeed, particularly for operations +on network filesystems which may have permissions semantics +beyond the usual \POSIX{} permission-bit model.} \end{funcdesc} \begin{datadesc}{F_OK} From nnorwitz at users.sourceforge.net Mon Oct 3 07:47:41 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 07:47:41 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libos.tex,1.168,1.169 Message-ID: <20051003054741.B17FB1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv1602/lib Modified Files: libos.tex Log Message: SF Bug #1008310, os.major() os.minor() example and description change. Will backport. Index: libos.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libos.tex,v retrieving revision 1.168 retrieving revision 1.169 diff -u -d -r1.168 -r1.169 --- libos.tex 3 Oct 2005 05:13:46 -0000 1.168 +++ libos.tex 3 Oct 2005 05:47:38 -0000 1.169 @@ -843,12 +843,14 @@ \end{funcdesc} \begin{funcdesc}{major}{device} -Extracts a device major number from a raw device number. +Extracts the device major number from a raw device number (usually +the \member{st_dev} or \member{st_rdev} field from \ctype{stat}). \versionadded{2.3} \end{funcdesc} \begin{funcdesc}{minor}{device} -Extracts a device minor number from a raw device number. +Extracts the device minor number from a raw device number (usually +the \member{st_dev} or \member{st_rdev} field from \ctype{stat}). \versionadded{2.3} \end{funcdesc} From nnorwitz at users.sourceforge.net Mon Oct 3 07:48:22 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 07:48:22 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libos.tex, 1.146.2.11, 1.146.2.12 Message-ID: <20051003054822.2C5841E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv1724/Doc/lib Modified Files: Tag: release24-maint libos.tex Log Message: Backport: SF Bug #1008310, os.major() os.minor() example and description change. Index: libos.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libos.tex,v retrieving revision 1.146.2.11 retrieving revision 1.146.2.12 diff -u -d -r1.146.2.11 -r1.146.2.12 --- libos.tex 3 Oct 2005 05:14:25 -0000 1.146.2.11 +++ libos.tex 3 Oct 2005 05:48:18 -0000 1.146.2.12 @@ -823,12 +823,14 @@ \end{funcdesc} \begin{funcdesc}{major}{device} -Extracts a device major number from a raw device number. +Extracts the device major number from a raw device number (usually +the \member{st_dev} or \member{st_rdev} field from \ctype{stat}). \versionadded{2.3} \end{funcdesc} \begin{funcdesc}{minor}{device} -Extracts a device minor number from a raw device number. +Extracts the device minor number from a raw device number (usually +the \member{st_dev} or \member{st_rdev} field from \ctype{stat}). \versionadded{2.3} \end{funcdesc} From nnorwitz at users.sourceforge.net Mon Oct 3 09:46:37 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 3 Oct 2005 09:46:37 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc valgrind-python.supp, 1.1, 1.2 Message-ID: <20051003074637.998B41E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26526/Misc Modified Files: valgrind-python.supp Log Message: suppress more errors on my box, most are gentoo specific. Py_ADDRESS_IN_RANGE is x86_64 specific and the readline memory leaks should be generally applicable Index: valgrind-python.supp =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/valgrind-python.supp,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- valgrind-python.supp 6 Jun 2004 19:58:40 -0000 1.1 +++ valgrind-python.supp 3 Oct 2005 07:46:34 -0000 1.2 @@ -29,6 +29,12 @@ } { + ADDRESS_IN_RANGE/Invalid read of size 8 (x86_64) + Memcheck:Value8 + fun:Py_ADDRESS_IN_RANGE +} + +{ ADDRESS_IN_RANGE/Conditional jump or move depends on uninitialised value Memcheck:Cond fun:Py_ADDRESS_IN_RANGE @@ -84,6 +90,118 @@ } +{ + Avoid problem in libc on gentoo + Memcheck:Cond + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so +} + +{ + Avoid problem in glibc on gentoo + Memcheck:Addr8 + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/libc-2.3.4.so + obj:/lib/ld-2.3.4.so + fun:_dl_open + obj:/lib/libdl-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/libdl-2.3.4.so + fun:dlopen +} + +{ + Avoid problem in glibc on gentoo + Memcheck:Addr8 + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/libc-2.3.4.so + obj:/lib/ld-2.3.4.so + fun:_dl_open + obj:/lib/libdl-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/libdl-2.3.4.so + fun:dlopen +} + +{ + Avoid problem in glibc on gentoo + Memcheck:Cond + obj:/lib/ld-2.3.4.so + obj:/lib/libc-2.3.4.so + obj:/lib/ld-2.3.4.so + fun:_dl_open + obj:/lib/libdl-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/libdl-2.3.4.so + fun:dlopen +} + +{ + Avoid problem in glibc on gentoo + Memcheck:Cond + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/libc-2.3.4.so + obj:/lib/ld-2.3.4.so + fun:_dl_open + obj:/lib/libdl-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/libdl-2.3.4.so + fun:dlopen +} + +{ + Avoid problems w/readline doing a putenv and leaking on exit + Memcheck:Leak + fun:malloc + fun:xmalloc + fun:sh_set_lines_and_columns + fun:_rl_get_screen_size + fun:_rl_init_terminal_io + obj:/lib/libreadline.so.4.3 + fun:rl_initialize + fun:setup_readline + fun:initreadline + fun:_PyImport_LoadDynamicModule + fun:load_module + fun:import_submodule + fun:load_next + fun:import_module_ex + fun:PyImport_ImportModuleEx +} + +{ + Mysterious leak that seems to deal w/pthreads + Memcheck:Leak + fun:calloc + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + fun:_dl_allocate_tls + fun:__pthread_initialize_minimal +} + +{ + Mysterious leak that seems to deal w/pthreads + Memcheck:Leak + fun:memalign + obj:/lib/ld-2.3.4.so + fun:_dl_allocate_tls + fun:__pthread_initialize_minimal +} + ### ### These occur from somewhere within the SSL, when running ### test_socket_sll. They are too general to leave on by default. From birkenfeld at users.sourceforge.net Mon Oct 3 16:16:47 2005 From: birkenfeld at users.sourceforge.net (birkenfeld@users.sourceforge.net) Date: Mon, 3 Oct 2005 16:16:47 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS,1.1385,1.1386 Message-ID: <20051003141647.D44371E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv11970/Misc Modified Files: NEWS Log Message: Patch #754022: Greatly enhanced webbrowser.py. Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1385 retrieving revision 1.1386 diff -u -d -r1.1385 -r1.1386 --- NEWS 3 Oct 2005 04:48:14 -0000 1.1385 +++ NEWS 3 Oct 2005 14:16:44 -0000 1.1386 @@ -254,6 +254,8 @@ Library ------- +- Patch #754022: Greatly enhanced webbrowser.py (by Oleg Broytmann). + - Bug #729103: pydoc.py: Fix docother() method to accept additional "parent" argument. From birkenfeld at users.sourceforge.net Mon Oct 3 16:16:48 2005 From: birkenfeld at users.sourceforge.net (birkenfeld@users.sourceforge.net) Date: Mon, 3 Oct 2005 16:16:48 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libwebbrowser.tex, 1.10, 1.11 Message-ID: <20051003141648.14A361E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv11970/Doc/lib Modified Files: libwebbrowser.tex Log Message: Patch #754022: Greatly enhanced webbrowser.py. Index: libwebbrowser.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libwebbrowser.tex,v retrieving revision 1.10 retrieving revision 1.11 diff -u -d -r1.10 -r1.11 --- libwebbrowser.tex 19 Jul 2001 03:49:33 -0000 1.10 +++ libwebbrowser.tex 3 Oct 2005 14:16:44 -0000 1.11 @@ -6,9 +6,8 @@ \moduleauthor{Fred L. Drake, Jr.}{fdrake at acm.org} \sectionauthor{Fred L. Drake, Jr.}{fdrake at acm.org} -The \module{webbrowser} module provides a very high-level interface to -allow displaying Web-based documents to users. The controller objects -are easy to use and are platform-independent. Under most +The \module{webbrowser} module provides a high-level interface to +allow displaying Web-based documents to users. Under most circumstances, simply calling the \function{open()} function from this module will do the right thing. @@ -17,19 +16,26 @@ display isn't available. If text-mode browsers are used, the calling process will block until the user exits the browser. -Under \UNIX, if the environment variable \envvar{BROWSER} exists, it +If the environment variable \envvar{BROWSER} exists, it is interpreted to override the platform default list of browsers, as a -colon-separated list of browsers to try in order. When the value of +os.pathsep-separated list of browsers to try in order. When the value of a list part contains the string \code{\%s}, then it is interpreted as a literal browser command line to be used with the argument URL substituted for the \code{\%s}; if the part does not contain \code{\%s}, it is simply interpreted as the name of the browser to launch. -For non-\UNIX{} platforms, or when X11 browsers are available on +For non-\UNIX{} platforms, or when a remote browser is available on \UNIX, the controlling process will not wait for the user to finish -with the browser, but allow the browser to maintain its own window on -the display. +with the browser, but allow the remote browser to maintain its own +windows on the display. If remote browsers are not available on \UNIX, +the controlling process will launch a new browser and wait. + +The script \program{webbrowser} can be used as a command-line interface +for the module. It accepts an URL as the argument. It accepts the following +optional parameters: \programopt{-n} opens the URL in a new browser window, +if possible; \programopt{-t} opens the URL in a new browser page ("tab"). The +options are, naturally, mutually exclusive. The following exception is defined: @@ -40,15 +46,24 @@ The following functions are defined: \begin{funcdesc}{open}{url\optional{, new=0}\optional{, autoraise=1}} - Display \var{url} using the default browser. If \var{new} is true, - a new browser window is opened if possible. If \var{autoraise} is + Display \var{url} using the default browser. If \var{new} is 0, the + \var{url} is opened in the same browser window. If \var{new} is 1, + a new browser window is opened if possible. If \var{new} is 2, + a new browser page ("tab") is opened if possible. If \var{autoraise} is true, the window is raised if possible (note that under many window managers this will occur regardless of the setting of this variable). + \end{funcdesc} -\begin{funcdesc}{open_new}{url} +\begin{funcdesc}{open_new_win}{url} Open \var{url} in a new window of the default browser, if possible, - otherwise, open \var{url} in the only browser window. + otherwise, open \var{url} in the only browser window. Alias + \function{open_new}. +\end{funcdesc} + +\begin{funcdesc}{open_new_tab}{url} + Open \var{url} in a new page ("tab") of the default browser, if possible, + otherwise equivalent to \function{open_new_win}. \end{funcdesc} \begin{funcdesc}{get}{\optional{name}} @@ -67,7 +82,7 @@ This entry point is only useful if you plan to either set the \envvar{BROWSER} variable or call \function{get} with a nonempty - argument matching the name of a handler you declare. + argument matching the name of a handler you declare. \end{funcdesc} A number of browser types are predefined. This table gives the type @@ -76,16 +91,24 @@ in this module. \begin{tableiii}{l|l|c}{code}{Type Name}{Class Name}{Notes} - \lineiii{'mozilla'}{\class{Netscape('mozilla')}}{} - \lineiii{'netscape'}{\class{Netscape('netscape')}}{} - \lineiii{'mosaic'}{\class{GenericBrowser('mosaic \%s \&')}}{} + \lineiii{'mozilla'}{\class{Mozilla('mozilla')}}{} + \lineiii{'firefox'}{\class{Mozilla('mozilla')}}{} + \lineiii{'netscape'}{\class{Mozilla('netscape')}}{} + \lineiii{'galeon'}{\class{Galeon('galeon')}}{} + \lineiii{'epiphany'}{\class{Galeon('epiphany')}}{} + \lineiii{'skipstone'}{\class{GenericBrowser('skipstone \%s \&')}}{} + \lineiii{'konqueror'}{\class{Konqueror()}}{(1)} \lineiii{'kfm'}{\class{Konqueror()}}{(1)} + \lineiii{'mosaic'}{\class{GenericBrowser('mosaic \%s \&')}}{} + \lineiii{'opera'}{\class{Opera()}}{} \lineiii{'grail'}{\class{Grail()}}{} \lineiii{'links'}{\class{GenericBrowser('links \%s')}}{} + \lineiii{'elinks'}{\class{Elinks('elinks')}}{} \lineiii{'lynx'}{\class{GenericBrowser('lynx \%s')}}{} \lineiii{'w3m'}{\class{GenericBrowser('w3m \%s')}}{} \lineiii{'windows-default'}{\class{WindowsDefault}}{(2)} \lineiii{'internet-config'}{\class{InternetConfig}}{(3)} + \lineiii{'macosx'}{\class{MacOSX('default')}}{(4)} \end{tableiii} \noindent @@ -101,13 +124,15 @@ implementation selects the best strategy for running Konqueror. \item[(2)] -Only on Windows platforms; requires the common -extension modules \module{win32api} and \module{win32con}. +Only on Windows platforms. \item[(3)] Only on MacOS platforms; requires the standard MacPython \module{ic} module, described in the \citetitle[../mac/module-ic.html]{Macintosh Library Modules} manual. + +\item[(4)] +Only on MacOS X platform. \end{description} @@ -117,12 +142,18 @@ module-level convenience functions: \begin{funcdesc}{open}{url\optional{, new}} - Display \var{url} using the browser handled by this controller. If - \var{new} is true, a new browser window is opened if possible. + Display \var{url} using the browser handled by this controller. + If \var{new} is 1, a new browser window is opened if possible. + If \var{new} is 2, a new browser page ("tab") is opened if possible. \end{funcdesc} -\begin{funcdesc}{open_new}{url} +\begin{funcdesc}{open_new_win}{url} Open \var{url} in a new window of the browser handled by this controller, if possible, otherwise, open \var{url} in the only - browser window. + browser window. Alias \function{open_new}. +\end{funcdesc} + +\begin{funcdesc}{open_new_tab}{url} + Open \var{url} in a new page ("tab") of the browser handled by this + controller, if possible, otherwise equivalent to \function{open_new_win}. \end{funcdesc} From birkenfeld at users.sourceforge.net Mon Oct 3 16:16:48 2005 From: birkenfeld at users.sourceforge.net (birkenfeld@users.sourceforge.net) Date: Mon, 3 Oct 2005 16:16:48 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib webbrowser.py,1.37,1.38 Message-ID: <20051003141648.73D231E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv11970/Lib Modified Files: webbrowser.py Log Message: Patch #754022: Greatly enhanced webbrowser.py. Index: webbrowser.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/webbrowser.py,v retrieving revision 1.37 retrieving revision 1.38 diff -u -d -r1.37 -r1.38 --- webbrowser.py 10 Jul 2004 22:07:02 -0000 1.37 +++ webbrowser.py 3 Oct 2005 14:16:44 -0000 1.38 @@ -1,9 +1,11 @@ +#! /usr/bin/env python """Interfaces for launching and remotely controlling Web browsers.""" import os import sys +import stat -__all__ = ["Error", "open", "get", "register"] +__all__ = ["Error", "open", "open_new", "open_new_tab", "get", "register"] class Error(Exception): pass @@ -11,9 +13,13 @@ _browsers = {} # Dictionary of available browser controllers _tryorder = [] # Preference order of available browsers -def register(name, klass, instance=None): +def register(name, klass, instance=None, update_tryorder=1): """Register a browser connector and, optionally, connection.""" _browsers[name.lower()] = [klass, instance] + if update_tryorder > 0: + _tryorder.append(name) + elif update_tryorder < 0: + _tryorder.insert(0, name) def get(using=None): """Return a browser launcher instance appropriate for the environment.""" @@ -26,27 +32,36 @@ # User gave us a command line, don't mess with it. return GenericBrowser(browser) else: - # User gave us a browser name. + # User gave us a browser name or path. try: command = _browsers[browser.lower()] except KeyError: command = _synthesize(browser) - if command[1] is None: - return command[0]() - else: + if command[1] is not None: return command[1] + elif command[0] is not None: + return command[0]() raise Error("could not locate runnable browser") # Please note: the following definition hides a builtin function. +# It is recommended one does "import webbrowser" and uses webbrowser.open(url) +# instead of "from webbrowser import *". def open(url, new=0, autoraise=1): - get().open(url, new, autoraise) + for name in _tryorder: + browser = get(name) + if browser.open(url, new, autoraise): + return True + return False def open_new(url): - get().open(url, 1) + return open(url, 1) +def open_new_tab(url): + return open(url, 2) -def _synthesize(browser): + +def _synthesize(browser, update_tryorder=1): """Attempt to synthesize a controller base on existing controllers. This is useful to create a controller when a user specifies a path to @@ -58,9 +73,10 @@ executable for the requested browser, return [None, None]. """ - if not os.path.exists(browser): + cmd = browser.split()[0] + if not _iscommand(cmd): return [None, None] - name = os.path.basename(browser) + name = os.path.basename(cmd) try: command = _browsers[name.lower()] except KeyError: @@ -72,132 +88,199 @@ controller = copy.copy(controller) controller.name = browser controller.basename = os.path.basename(browser) - register(browser, None, controller) + register(browser, None, controller, update_tryorder) return [None, controller] return [None, None] +if sys.platform[:3] == "win": + def _isexecutable(cmd): + cmd = cmd.lower() + if os.path.isfile(cmd) and (cmd.endswith(".exe") or + cmd.endswith(".bat")): + return True + for ext in ".exe", ".bat": + if os.path.isfile(cmd + ext): + return True + return False +else: + def _isexecutable(cmd): + if os.path.isfile(cmd): + mode = os.stat(cmd)[stat.ST_MODE] + if mode & stat.S_IXUSR or mode & stat.S_IXGRP or mode & stat.S_IXOTH: + return True + return False + def _iscommand(cmd): - """Return True if cmd can be found on the executable search path.""" + """Return True if cmd is executable or can be found on the executable + search path.""" + if _isexecutable(cmd): + return True path = os.environ.get("PATH") if not path: return False for d in path.split(os.pathsep): exe = os.path.join(d, cmd) - if os.path.isfile(exe): + if _isexecutable(exe): return True return False -PROCESS_CREATION_DELAY = 4 +# General parent classes + +class BaseBrowser(object): + """Parent class for all browsers.""" + def __init__(self, name=""): + self.name = name + + def open_new(self, url): + return self.open(url, 1) + + def open_new_tab(self, url): + return self.open(url, 2) + + +class GenericBrowser(BaseBrowser): + """Class for all browsers started with a command + and without remote functionality.""" -class GenericBrowser: def __init__(self, cmd): self.name, self.args = cmd.split(None, 1) - self.basename = os.path.basename(self.name) def open(self, url, new=0, autoraise=1): assert "'" not in url command = "%s %s" % (self.name, self.args) - os.system(command % url) + rc = os.system(command % url) + return not rc - def open_new(self, url): - self.open(url) +class UnixBrowser(BaseBrowser): + """Parent class for all Unix browsers with remote functionality.""" -class Netscape: - "Launcher class for Netscape browsers." - def __init__(self, name): - self.name = name - self.basename = os.path.basename(name) + raise_opts = None - def _remote(self, action, autoraise): - raise_opt = ("-noraise", "-raise")[autoraise] - cmd = "%s %s -remote '%s' >/dev/null 2>&1" % (self.name, - raise_opt, - action) + remote_cmd = '' + remote_action = None + remote_action_newwin = None + remote_action_newtab = None + remote_background = False + + def _remote(self, url, action, autoraise): + autoraise = int(bool(autoraise)) # always 0/1 + raise_opt = self.raise_opts and self.raise_opts[autoraise] or '' + cmd = "%s %s %s '%s' >/dev/null 2>&1" % (self.name, raise_opt, + self.remote_cmd, action) + if remote_background: + cmd += ' &' rc = os.system(cmd) if rc: - import time - os.system("%s &" % self.name) - time.sleep(PROCESS_CREATION_DELAY) - rc = os.system(cmd) + # bad return status, try again with simpler command + rc = os.system("%s %s" % (self.name, url)) return not rc def open(self, url, new=0, autoraise=1): - if new: - self._remote("openURL(%s, new-window)"%url, autoraise) + assert "'" not in url + if new == 0: + action = self.remote_action + elif new == 1: + action = self.remote_action_newwin + elif new == 2: + if self.remote_action_newtab is None: + action = self.remote_action_newwin + else: + action = self.remote_action_newtab else: - self._remote("openURL(%s)" % url, autoraise) + raise Error("Bad 'new' parameter to open(); expected 0, 1, or 2, got %s" % new) + return self._remote(url, action % url, autoraise) - def open_new(self, url): - self.open(url, 1) +class Mozilla(UnixBrowser): + """Launcher class for Mozilla/Netscape browsers.""" -class Galeon: - """Launcher class for Galeon browsers.""" - def __init__(self, name): - self.name = name - self.basename = os.path.basename(name) + raise_opts = ("-noraise", "-raise") - def _remote(self, action, autoraise): - raise_opt = ("--noraise", "")[autoraise] - cmd = "%s %s %s >/dev/null 2>&1" % (self.name, raise_opt, action) - rc = os.system(cmd) - if rc: - import time - os.system("%s >/dev/null 2>&1 &" % self.name) - time.sleep(PROCESS_CREATION_DELAY) - rc = os.system(cmd) - return not rc + remote_cmd = '-remote' + remote_action = "openURL(%s)" + remote_action_newwin = "openURL(%s,new-window)" + remote_action_newtab = "openURL(%s,new-tab)" - def open(self, url, new=0, autoraise=1): - if new: - self._remote("-w '%s'" % url, autoraise) - else: - self._remote("-n '%s'" % url, autoraise) +Netscape = Mozilla - def open_new(self, url): - self.open(url, 1) +class Galeon(UnixBrowser): + """Launcher class for Galeon/Epiphany browsers.""" -class Konqueror: + raise_opts = ("-noraise", "") + remote_action = "-n '%s'" + remote_action_newwin = "-w '%s'" + + remote_background = True + + +class Konqueror(BaseBrowser): """Controller for the KDE File Manager (kfm, or Konqueror). See http://developer.kde.org/documentation/other/kfmclient.html for more information on the Konqueror remote-control interface. """ - def __init__(self): - if _iscommand("konqueror"): - self.name = self.basename = "konqueror" - else: - self.name = self.basename = "kfm" - def _remote(self, action): + def _remote(self, url, action): + # kfmclient is the new KDE way of opening URLs. cmd = "kfmclient %s >/dev/null 2>&1" % action rc = os.system(cmd) + # Fall back to other variants. if rc: - import time - if self.basename == "konqueror": - os.system(self.name + " --silent &") - else: - os.system(self.name + " -d &") - time.sleep(PROCESS_CREATION_DELAY) - rc = os.system(cmd) + if _iscommand("konqueror"): + rc = os.system(self.name + " --silent '%s' &" % url) + elif _iscommand("kfm"): + rc = os.system(self.name + " -d '%s'" % url) return not rc - def open(self, url, new=1, autoraise=1): + def open(self, url, new=0, autoraise=1): # XXX Currently I know no way to prevent KFM from # opening a new win. assert "'" not in url - self._remote("openURL '%s'" % url) + if new == 2: + action = "newTab '%s'" % url + else: + action = "openURL '%s'" % url + ok = self._remote(url, action) + return ok - open_new = open +class Opera(UnixBrowser): + "Launcher class for Opera browser." -class Grail: + raise_opts = ("", "-raise") + + remote_cmd = '-remote' + remote_action = "openURL(%s)" + remote_action_newwin = "openURL(%s,new-window)" + remote_action_newtab = "openURL(%s,new-page)" + + +class Elinks(UnixBrowser): + "Launcher class for Elinks browsers." + + remote_cmd = '-remote' + remote_action = "openURL(%s)" + remote_action_newwin = "openURL(%s,new-window)" + remote_action_newtab = "openURL(%s,new-tab)" + + def _remote(self, url, action, autoraise): + # elinks doesn't like its stdout to be redirected - + # it uses redirected stdout as a signal to do -dump + cmd = "%s %s '%s' 2>/dev/null" % (self.name, + self.remote_cmd, action) + rc = os.system(cmd) + if rc: + rc = os.system("%s %s" % (self.name, url)) + return not rc + + +class Grail(BaseBrowser): # There should be a way to maintain a connection to Grail, but the # Grail remote control protocol doesn't really allow that at this # point. It probably neverwill! @@ -237,93 +320,97 @@ def open(self, url, new=0, autoraise=1): if new: - self._remote("LOADNEW " + url) + ok = self._remote("LOADNEW " + url) else: - self._remote("LOAD " + url) - - def open_new(self, url): - self.open(url, 1) - - -class WindowsDefault: - def open(self, url, new=0, autoraise=1): - os.startfile(url) + ok = self._remote("LOAD " + url) + return ok - def open_new(self, url): - self.open(url) # # Platform support for Unix # -# This is the right test because all these Unix browsers require either -# a console terminal of an X display to run. Note that we cannot split -# the TERM and DISPLAY cases, because we might be running Python from inside -# an xterm. -if os.environ.get("TERM") or os.environ.get("DISPLAY"): - _tryorder = ["links", "lynx", "w3m"] - - # Easy cases first -- register console browsers if we have them. - if os.environ.get("TERM"): - # The Links browser - if _iscommand("links"): - register("links", None, GenericBrowser("links '%s'")) - # The Lynx browser - if _iscommand("lynx"): - register("lynx", None, GenericBrowser("lynx '%s'")) - # The w3m browser - if _iscommand("w3m"): - register("w3m", None, GenericBrowser("w3m '%s'")) +# These are the right tests because all these Unix browsers require either +# a console terminal or an X display to run. - # X browsers have more in the way of options - if os.environ.get("DISPLAY"): - _tryorder = ["galeon", "skipstone", - "mozilla-firefox", "mozilla-firebird", "mozilla", "netscape", - "kfm", "grail"] + _tryorder +# Prefer X browsers if present +if os.environ.get("DISPLAY"): - # First, the Netscape series - for browser in ("mozilla-firefox", "mozilla-firebird", - "mozilla", "netscape"): - if _iscommand(browser): - register(browser, None, Netscape(browser)) + # First, the Mozilla/Netscape browsers + for browser in ("mozilla-firefox", "firefox", + "mozilla-firebird", "firebird", + "mozilla", "netscape"): + if _iscommand(browser): + register(browser, None, Mozilla(browser)) - # Next, Mosaic -- old but still in use. - if _iscommand("mosaic"): - register("mosaic", None, GenericBrowser( - "mosaic '%s' >/dev/null &")) + # The default Gnome browser + if _iscommand("gconftool-2"): + # get the web browser string from gconftool + gc = 'gconftool-2 -g /desktop/gnome/url-handlers/http/command' + out = os.popen(gc) + commd = out.read().strip() + retncode = out.close() - # Gnome's Galeon - if _iscommand("galeon"): - register("galeon", None, Galeon("galeon")) + # if successful, register it + if retncode == None and len(commd) != 0: + register("gnome", None, GenericBrowser( + commd + " '%s' >/dev/null &")) - # Skipstone, another Gtk/Mozilla based browser - if _iscommand("skipstone"): - register("skipstone", None, GenericBrowser( - "skipstone '%s' >/dev/null &")) + # Konqueror/kfm, the KDE browser. + if _iscommand("kfm") or _iscommand("konqueror"): + register("kfm", Konqueror, Konqueror()) - # Konqueror/kfm, the KDE browser. - if _iscommand("kfm") or _iscommand("konqueror"): - register("kfm", Konqueror, Konqueror()) + # Gnome's Galeon and Epiphany + for browser in ("galeon", "epiphany"): + if _iscommand(browser): + register(browser, None, Galeon(browser)) - # Grail, the Python browser. - if _iscommand("grail"): - register("grail", Grail, None) + # Skipstone, another Gtk/Mozilla based browser + if _iscommand("skipstone"): + register("skipstone", None, GenericBrowser("skipstone '%s' &")) + # Opera, quite popular + if _iscommand("opera"): + register("opera", None, Opera("opera")) -class InternetConfig: - def open(self, url, new=0, autoraise=1): - ic.launchurl(url) + # Next, Mosaic -- old but still in use. + if _iscommand("mosaic"): + register("mosaic", None, GenericBrowser("mosaic '%s' &")) - def open_new(self, url): - self.open(url) + # Grail, the Python browser. Does anybody still use it? + if _iscommand("grail"): + register("grail", Grail, None) +# Also try console browsers +if os.environ.get("TERM"): + # The Links/elinks browsers + if _iscommand("links"): + register("links", None, GenericBrowser("links '%s'")) + if _iscommand("elinks"): + register("elinks", None, Elinks("elinks")) + # The Lynx browser , + if _iscommand("lynx"): + register("lynx", None, GenericBrowser("lynx '%s'")) + # The w3m browser + if _iscommand("w3m"): + register("w3m", None, GenericBrowser("w3m '%s'")) # # Platform support for Windows # if sys.platform[:3] == "win": - _tryorder = ["netscape", "windows-default"] + class WindowsDefault(BaseBrowser): + def open(self, url, new=0, autoraise=1): + os.startfile(url) + return True # Oh, my... + + _tryorder = [] + _browsers = {} + # Prefer mozilla/netscape/opera if present + for browser in ("firefox", "firebird", "mozilla", "netscape", "opera"): + if _iscommand(browser): + register(browser, None, GenericBrowser(browser + ' %s')) register("windows-default", WindowsDefault) # @@ -335,36 +422,112 @@ except ImportError: pass else: - # internet-config is the only supported controller on MacOS, - # so don't mess with the default! - _tryorder = ["internet-config"] - register("internet-config", InternetConfig) + class InternetConfig(BaseBrowser): + def open(self, url, new=0, autoraise=1): + ic.launchurl(url) + return True # Any way to get status? + + register("internet-config", InternetConfig, update_tryorder=-1) + +if sys.platform == 'darwin': + # Adapted from patch submitted to SourceForge by Steven J. Burr + class MacOSX(BaseBrowser): + """Launcher class for Aqua browsers on Mac OS X + + Optionally specify a browser name on instantiation. Note that this + will not work for Aqua browsers if the user has moved the application + package after installation. + + If no browser is specified, the default browser, as specified in the + Internet System Preferences panel, will be used. + """ + def __init__(self, name): + self.name = name + + def open(self, url, new=0, autoraise=1): + assert "'" not in url + # new must be 0 or 1 + new = int(bool(new)) + if self.name == "default": + # User called open, open_new or get without a browser parameter + script = _safequote('open location "%s"', url) # opens in default browser + else: + # User called get and chose a browser + if self.name == "OmniWeb": + toWindow = "" + else: + # Include toWindow parameter of OpenURL command for browsers + # that support it. 0 == new window; -1 == existing + toWindow = "toWindow %d" % (new - 1) + cmd = _safequote('OpenURL "%s"', url) + script = '''tell application "%s" + activate + %s %s + end tell''' % (self.name, cmd, toWindow) + # Open pipe to AppleScript through osascript command + osapipe = os.popen("osascript", "w") + if osapipe is None: + return False + # Write script to osascript's stdin + osapipe.write(script) + rc = osapipe.close() + return not rc + + # Don't clear _tryorder or _browsers since OS X can use above Unix support + # (but we prefer using the OS X specific stuff) + register("MacOSX", None, MacOSX('default'), -1) + # # Platform support for OS/2 # -if sys.platform[:3] == "os2" and _iscommand("netscape.exe"): - _tryorder = ["os2netscape"] +if sys.platform[:3] == "os2" and _iscommand("netscape"): + _tryorder = [] + _browsers = {} register("os2netscape", None, - GenericBrowser("start netscape.exe %s")) + GenericBrowser("start netscape %s"), -1) + # OK, now that we know what the default preference orders for each # platform are, allow user to override them with the BROWSER variable. -# if "BROWSER" in os.environ: - # It's the user's responsibility to register handlers for any unknown - # browser referenced by this value, before calling open(). - _tryorder = os.environ["BROWSER"].split(os.pathsep) + _userchoices = os.environ["BROWSER"].split(os.pathsep) + _userchoices.reverse() -for cmd in _tryorder: - if not cmd.lower() in _browsers: - if _iscommand(cmd.lower()): - register(cmd.lower(), None, GenericBrowser( - "%s '%%s'" % cmd.lower())) -cmd = None # to make del work if _tryorder was empty -del cmd + # Treat choices in same way as if passed into get() but do register + # and prepend to _tryorder + for cmdline in _userchoices: + if cmdline != '': + _synthesize(cmdline, -1) + cmdline = None # to make del work if _userchoices was empty + del cmdline + del _userchoices -_tryorder = filter(lambda x: x.lower() in _browsers - or x.find("%s") > -1, _tryorder) # what to do if _tryorder is now empty? + + +def main(): + import getopt + usage = """Usage: %s [-n | -t] url + -n: open new window + -t: open new tab""" % sys.argv[0] + try: + opts, args = getopt.getopt(sys.argv[1:], 'ntd') + except getopt.error, msg: + print >>sys.stderr, msg + print >>sys.stderr, usage + sys.exit(1) + new_win = 0 + for o, a in opts: + if o == '-n': new_win = 1 + elif o == '-t': new_win = 2 + if len(args) <> 1: + print >>sys.stderr, usage + sys.exit(1) + + url = args[0] + open(url, new_win) + +if __name__ == "__main__": + main() From fdrake at users.sourceforge.net Mon Oct 3 16:25:44 2005 From: fdrake at users.sourceforge.net (fdrake@users.sourceforge.net) Date: Mon, 3 Oct 2005 16:25:44 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libstdtypes.tex, 1.185, 1.186 Message-ID: <20051003142544.A759D1E4010@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14325/lib Modified Files: libstdtypes.tex Log Message: fix link to subsection (SF bug #1311674) Index: libstdtypes.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libstdtypes.tex,v retrieving revision 1.185 retrieving revision 1.186 diff -u -d -r1.185 -r1.186 --- libstdtypes.tex 18 Aug 2005 21:27:11 -0000 1.185 +++ libstdtypes.tex 3 Oct 2005 14:25:40 -0000 1.186 @@ -1295,8 +1295,10 @@ \module{sets} module. \begin{seealso} - \seemodule[comparison-to-builtin-set]{sets}{Differences between - the \module{sets} module and the built-in set types.} + \seelink{comparison-to-builtin-set.html} + {Comparison to the built-in set types} + {Differences between the \module{sets} module and the + built-in set types.} \end{seealso} From fdrake at users.sourceforge.net Mon Oct 3 16:27:08 2005 From: fdrake at users.sourceforge.net (fdrake@users.sourceforge.net) Date: Mon, 3 Oct 2005 16:27:08 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libstdtypes.tex, 1.170.2.13, 1.170.2.14 Message-ID: <20051003142708.43E941E400A@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14629/lib Modified Files: Tag: release24-maint libstdtypes.tex Log Message: fix link to subsection (SF bug #1311674) Index: libstdtypes.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libstdtypes.tex,v retrieving revision 1.170.2.13 retrieving revision 1.170.2.14 diff -u -d -r1.170.2.13 -r1.170.2.14 --- libstdtypes.tex 4 Jul 2005 14:18:20 -0000 1.170.2.13 +++ libstdtypes.tex 3 Oct 2005 14:27:04 -0000 1.170.2.14 @@ -1295,8 +1295,10 @@ \module{sets} module. \begin{seealso} - \seemodule[comparison-to-builtin-set]{sets}{Differences between - the \module{sets} module and the built-in set types.} + \seelink{comparison-to-builtin-set.html} + {Comparison to the built-in set types} + {Differences between the \module{sets} module and the + built-in set types.} \end{seealso} From rhettinger at users.sourceforge.net Mon Oct 3 18:39:56 2005 From: rhettinger at users.sourceforge.net (rhettinger@users.sourceforge.net) Date: Mon, 3 Oct 2005 18:39:56 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/ref ref6.tex,1.77,1.78 Message-ID: <20051003163956.8AB021E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/ref In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14427 Modified Files: ref6.tex Log Message: Correct docs for empty raise when no exception is active. Index: ref6.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref6.tex,v retrieving revision 1.77 retrieving revision 1.78 diff -u -d -r1.77 -r1.78 --- ref6.tex 7 Sep 2005 05:17:07 -0000 1.77 +++ ref6.tex 3 Oct 2005 16:39:51 -0000 1.78 @@ -524,8 +524,9 @@ If no expressions are present, \keyword{raise} re-raises the last exception that was active in the current scope. If no exception is -active in the current scope, a \exception{Queue.Empty} exception is -raised indicating this error. +active in the current scope, a \exception{TypeError} exception is +raised indicating that this is an error (if running under IDLE, a +\exception{Queue.Empty} exception is raised instead}. \index{exception} \indexii{raising}{exception} From rhettinger at users.sourceforge.net Mon Oct 3 18:40:39 2005 From: rhettinger at users.sourceforge.net (rhettinger@users.sourceforge.net) Date: Mon, 3 Oct 2005 18:40:39 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/ref ref6.tex, 1.73.2.4, 1.73.2.5 Message-ID: <20051003164039.923BE1E4119@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/ref In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14619 Modified Files: Tag: release24-maint ref6.tex Log Message: Correct docs for empty raise when no exception is active. Index: ref6.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref6.tex,v retrieving revision 1.73.2.4 retrieving revision 1.73.2.5 diff -u -d -r1.73.2.4 -r1.73.2.5 --- ref6.tex 7 Sep 2005 05:18:06 -0000 1.73.2.4 +++ ref6.tex 3 Oct 2005 16:40:36 -0000 1.73.2.5 @@ -524,8 +524,9 @@ If no expressions are present, \keyword{raise} re-raises the last exception that was active in the current scope. If no exception is -active in the current scope, a \exception{Queue.Empty} exception is -raised indicating this error. +active in the current scope, a \exception{TypeError} exception is +raised indicating that this is an error (if running under IDLE, a +\exception{Queue.Empty} exception is raised instead}. \index{exception} \indexii{raising}{exception} From kbk at users.sourceforge.net Mon Oct 3 21:26:07 2005 From: kbk at users.sourceforge.net (kbk@users.sourceforge.net) Date: Mon, 3 Oct 2005 21:26:07 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/idlelib CodeContext.py, 1.5, 1.6 Message-ID: <20051003192607.7D5451E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/idlelib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29701 Modified Files: CodeContext.py Log Message: Tweak CodeContext.py docstrings, comments, and names. Index: CodeContext.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/idlelib/CodeContext.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- CodeContext.py 2 Oct 2005 23:36:46 -0000 1.5 +++ CodeContext.py 3 Oct 2005 19:26:03 -0000 1.6 @@ -1,13 +1,12 @@ -"""CodeContext - Display the block context of code at top of edit window +"""CodeContext - Extension to display the block context above the edit window -Once code has scrolled off the top of the screen, it can be difficult -to determine which block you are in. This extension implements a pane -at the top of each IDLE edit window which provides block structure -hints. These hints are the lines which contain the block opening -keywords, e.g. 'if', for the enclosing block. The number of hint lines -is determined by the numlines variable in the CodeContext section of -config-extensions.def. Lines which do not open blocks are not shown in -the context hints pane. +Once code has scrolled off the top of a window, it can be difficult to +determine which block you are in. This extension implements a pane at the top +of each IDLE edit window which provides block structure hints. These hints are +the lines which contain the block opening keywords, e.g. 'if', for the +enclosing block. The number of hint lines is determined by the numlines +variable in the CodeContext section of config-extensions.def. Lines which do +not open blocks are not shown in the context hints pane. """ import Tkinter @@ -21,13 +20,14 @@ UPDATEINTERVAL = 100 # millisec FONTUPDATEINTERVAL = 1000 # millisec -getspacesfirstword = lambda s, c=re.compile(r"^(\s*)(\w*)"): c.match(s).groups() +getspacesfirstword =\ + lambda s, c=re.compile(r"^(\s*)(\w*)"): c.match(s).groups() class CodeContext: menudefs = [('options', [('!Code Conte_xt', '<>')])] - numlines = idleConf.GetOption("extensions", "CodeContext", - "numlines", type="int", default=3) + context_depth = idleConf.GetOption("extensions", "CodeContext", + "numlines", type="int", default=3) bgcolor = idleConf.GetOption("extensions", "CodeContext", "bgcolor", type="str", default="LightGray") fgcolor = idleConf.GetOption("extensions", "CodeContext", @@ -37,13 +37,13 @@ self.text = editwin.text self.textfont = self.text["font"] self.label = None - # self.info holds information about the context lines of line number - # self.lastfirstline. The information is a tuple of the line's - # indentation, the line's text and the keyword at the beginning of the - # line, as returned by get_line_info. At the beginning of the list - # there's a dummy line, which starts the "block" of the whole document. + # self.info is a list of (line number, indent level, line text, block + # keyword) tuples providing the block structure associated with + # self.topvisible (the linenumber of the line displayed at the top of + # the edit window). self.info[0] is initialized as a 'dummy' line which + # starts the toplevel 'block' of the module. self.info = [(0, -1, "", False)] - self.lastfirstline = 1 + self.topvisible = 1 visible = idleConf.GetOption("extensions", "CodeContext", "visible", type="bool", default=False) if visible: @@ -56,7 +56,7 @@ def toggle_code_context_event(self, event=None): if not self.label: self.label = Tkinter.Label(self.editwin.top, - text="\n" * (self.numlines - 1), + text="\n" * (self.context_depth - 1), anchor="w", justify="left", font=self.textfont, bg=self.bgcolor, fg=self.fgcolor, @@ -77,6 +77,7 @@ If the line does not start a block, the keyword value is False. The indentation of empty lines (or comment lines) is INFINITY. + """ text = self.text.get("%d.0" % linenum, "%d.end" % linenum) spaces, firstword = getspacesfirstword(text) @@ -87,64 +88,69 @@ indent = len(spaces) return indent, text, opener - def interesting_lines(self, firstline, stopline=1, stopindent=0): - """ - Find the context lines, starting at firstline. - Will not return lines whose index is smaller than stopline or whose - indentation is smaller than stopindent. - stopline should always be >= 1, so the dummy block start will never - be returned (This function doesn't know what to do about it.) - Returns a list with the context lines, starting from the first (top), - and a number which all context lines above the inspected region should - have a smaller indentation than it. + def get_context(self, new_topvisible, stopline=1, stopindent=0): + """Get context lines, starting at new_topvisible and working backwards. + + Stop when stopline or stopindent is reached. Return a tuple of context + data and the indent level at the top of the region inspected. + """ + assert stopline > 0 lines = [] # The indentation level we are currently in: lastindent = INFINITY # For a line to be interesting, it must begin with a block opening # keyword, and have less indentation than lastindent. - for line_index in xrange(firstline, stopline-1, -1): - indent, text, opener = self.get_line_info(line_index) + for linenum in xrange(new_topvisible, stopline-1, -1): + indent, text, opener = self.get_line_info(linenum) if indent < lastindent: lastindent = indent if opener in ("else", "elif"): # We also show the if statement lastindent += 1 - if opener and line_index < firstline and indent >= stopindent: - lines.append((line_index, indent, text, opener)) + if opener and linenum < new_topvisible and indent >= stopindent: + lines.append((linenum, indent, text, opener)) if lastindent <= stopindent: break lines.reverse() return lines, lastindent - def update_label(self): - """Update the CodeContext label, if needed. + def update_code_context(self): + """Update context information and lines visible in the context pane. + """ - firstline = int(self.text.index("@0,0").split('.')[0]) - if self.lastfirstline == firstline: + new_topvisible = int(self.text.index("@0,0").split('.')[0]) + if self.topvisible == new_topvisible: # haven't scrolled return - if self.lastfirstline < firstline: - lines, lastindent = self.interesting_lines(firstline, - self.lastfirstline) + if self.topvisible < new_topvisible: # scroll down + lines, lastindent = self.get_context(new_topvisible, + self.topvisible) + # retain only context info applicable to the region + # between topvisible and new_topvisible: while self.info[-1][1] >= lastindent: del self.info[-1] - self.info.extend(lines) - else: + elif self.topvisible > new_topvisible: # scroll up stopindent = self.info[-1][1] + 1 - while self.info[-1][0] >= firstline: + # retain only context info associated + # with lines above new_topvisible: + while self.info[-1][0] >= new_topvisible: stopindent = self.info[-1][1] del self.info[-1] - lines, lastindent = self.interesting_lines( - firstline, self.info[-1][0]+1, stopindent) - self.info.extend(lines) - self.lastfirstline = firstline - lines = [""] * max(0, self.numlines - len(self.info)) + \ - [x[2] for x in self.info[-self.numlines:]] - self.label["text"] = '\n'.join(lines) + lines, lastindent = self.get_context(new_topvisible, + self.info[-1][0]+1, + stopindent) + self.info.extend(lines) + self.topvisible = new_topvisible + + # empty lines in context pane: + context_strings = [""] * max(0, self.context_depth - len(self.info)) + # followed by the context hint lines: + context_strings += [x[2] for x in self.info[-self.context_depth:]] + self.label["text"] = '\n'.join(context_strings) def timer_event(self): if self.label: - self.update_label() + self.update_code_context() self.text.after(UPDATEINTERVAL, self.timer_event) def font_timer_event(self): From kbk at users.sourceforge.net Mon Oct 3 22:08:28 2005 From: kbk at users.sourceforge.net (kbk@users.sourceforge.net) Date: Mon, 3 Oct 2005 22:08:28 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/idlelib CodeContext.py, 1.6, 1.7 NEWS.txt, 1.63, 1.64 Message-ID: <20051003200828.C52491E402C@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/idlelib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv7668 Modified Files: CodeContext.py NEWS.txt Log Message: Incorporate Tal Einat's comment on Patch 936169: Fixes alignment problem. Index: CodeContext.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/idlelib/CodeContext.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- CodeContext.py 3 Oct 2005 19:26:03 -0000 1.6 +++ CodeContext.py 3 Oct 2005 20:08:25 -0000 1.7 @@ -55,18 +55,25 @@ def toggle_code_context_event(self, event=None): if not self.label: - self.label = Tkinter.Label(self.editwin.top, + self.pad_frame = Tkinter.Frame(self.editwin.top, + bg=self.bgcolor, border=2, + relief="sunken") + self.label = Tkinter.Label(self.pad_frame, text="\n" * (self.context_depth - 1), anchor="w", justify="left", font=self.textfont, bg=self.bgcolor, fg=self.fgcolor, - relief="sunken", + border=0, width=1, # Don't request more than we get ) - self.label.pack(side="top", fill="x", expand=0, - after=self.editwin.status_bar) + self.label.pack(side="top", fill="x", expand=True, + padx=4, pady=0) + self.pad_frame.pack(side="top", fill="x", expand=False, + padx=0, pady=0, + after=self.editwin.status_bar) else: self.label.destroy() + self.pad_frame.destroy() self.label = None idleConf.SetOption("extensions", "CodeContext", "visible", str(self.label is not None)) Index: NEWS.txt =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/idlelib/NEWS.txt,v retrieving revision 1.63 retrieving revision 1.64 diff -u -d -r1.63 -r1.64 --- NEWS.txt 2 Oct 2005 23:36:46 -0000 1.63 +++ NEWS.txt 3 Oct 2005 20:08:25 -0000 1.64 @@ -3,6 +3,8 @@ *Release date: XX-XXX-2005* +- Fixed CodeContext alignment problem, following suggestion from Tal Einat. + - Increased performance in CodeContext extension Patch 936169 Noam Raphael - Mac line endings were incorrect when pasting code from some browsers From nnorwitz at users.sourceforge.net Tue Oct 4 05:17:53 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Tue, 4 Oct 2005 05:17:53 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib webbrowser.py,1.38,1.39 Message-ID: <20051004031753.46F671E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2904/Lib Modified Files: webbrowser.py Log Message: Fix pychecker warnings Index: webbrowser.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/webbrowser.py,v retrieving revision 1.38 retrieving revision 1.39 diff -u -d -r1.38 -r1.39 --- webbrowser.py 3 Oct 2005 14:16:44 -0000 1.38 +++ webbrowser.py 4 Oct 2005 03:17:49 -0000 1.39 @@ -134,6 +134,9 @@ def __init__(self, name=""): self.name = name + def open(self, url, new=0, autoraise=1): + raise NotImplementedError + def open_new(self, url): return self.open(url, 1) @@ -171,7 +174,7 @@ raise_opt = self.raise_opts and self.raise_opts[autoraise] or '' cmd = "%s %s %s '%s' >/dev/null 2>&1" % (self.name, raise_opt, self.remote_cmd, action) - if remote_background: + if self.remote_background: cmd += ' &' rc = os.system(cmd) if rc: @@ -333,9 +336,7 @@ # These are the right tests because all these Unix browsers require either # a console terminal or an X display to run. -# Prefer X browsers if present -if os.environ.get("DISPLAY"): - +def register_X_browsers(): # First, the Mozilla/Netscape browsers for browser in ("mozilla-firefox", "firefox", "mozilla-firebird", "firebird", @@ -381,6 +382,10 @@ if _iscommand("grail"): register("grail", Grail, None) +# Prefer X browsers if present +if os.environ.get("DISPLAY"): + register_X_browsers() + # Also try console browsers if os.environ.get("TERM"): # The Links/elinks browsers From nnorwitz at users.sourceforge.net Tue Oct 4 05:31:05 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Tue, 4 Oct 2005 05:31:05 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libwebbrowser.tex, 1.11, 1.12 Message-ID: <20051004033105.295A31E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv4770/lib Modified Files: libwebbrowser.tex Log Message: open_new_win does not exist. use UNIX consistently, add autoraise parameter to open(). add versionadded tags to open_new_tab Index: libwebbrowser.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libwebbrowser.tex,v retrieving revision 1.11 retrieving revision 1.12 diff -u -d -r1.11 -r1.12 --- libwebbrowser.tex 3 Oct 2005 14:16:44 -0000 1.11 +++ libwebbrowser.tex 4 Oct 2005 03:31:01 -0000 1.12 @@ -11,7 +11,7 @@ circumstances, simply calling the \function{open()} function from this module will do the right thing. -Under \UNIX, graphical browsers are preferred under X11, but text-mode +Under \UNIX{}, graphical browsers are preferred under X11, but text-mode browsers will be used if graphical browsers are not available or an X11 display isn't available. If text-mode browsers are used, the calling process will block until the user exits the browser. @@ -26,9 +26,9 @@ launch. For non-\UNIX{} platforms, or when a remote browser is available on -\UNIX, the controlling process will not wait for the user to finish +\UNIX{}, the controlling process will not wait for the user to finish with the browser, but allow the remote browser to maintain its own -windows on the display. If remote browsers are not available on \UNIX, +windows on the display. If remote browsers are not available on \UNIX{}, the controlling process will launch a new browser and wait. The script \program{webbrowser} can be used as a command-line interface @@ -45,7 +45,7 @@ The following functions are defined: -\begin{funcdesc}{open}{url\optional{, new=0}\optional{, autoraise=1}} +\begin{funcdesc}{open}{url\optional{, new=0\optional{, autoraise=1}}} Display \var{url} using the default browser. If \var{new} is 0, the \var{url} is opened in the same browser window. If \var{new} is 1, a new browser window is opened if possible. If \var{new} is 2, @@ -55,15 +55,15 @@ \end{funcdesc} -\begin{funcdesc}{open_new_win}{url} +\begin{funcdesc}{open_new}{url} Open \var{url} in a new window of the default browser, if possible, - otherwise, open \var{url} in the only browser window. Alias - \function{open_new}. + otherwise, open \var{url} in the only browser window. \end{funcdesc} \begin{funcdesc}{open_new_tab}{url} Open \var{url} in a new page ("tab") of the default browser, if possible, - otherwise equivalent to \function{open_new_win}. + otherwise equivalent to \function{open_new}. +\versionadded{2.5} \end{funcdesc} \begin{funcdesc}{get}{\optional{name}} @@ -117,7 +117,7 @@ \begin{description} \item[(1)] ``Konqueror'' is the file manager for the KDE desktop environment for -UNIX, and only makes sense to use if KDE is running. Some way of +\UNIX{}, and only makes sense to use if KDE is running. Some way of reliably detecting KDE would be nice; the \envvar{KDEDIR} variable is not sufficient. Note also that the name ``kfm'' is used even when using the \program{konqueror} command with KDE 2 --- the @@ -141,13 +141,13 @@ Browser controllers provide two methods which parallel two of the module-level convenience functions: -\begin{funcdesc}{open}{url\optional{, new}} +\begin{funcdesc}{open}{url\optional{, new\optional{, autoraise=1}}} Display \var{url} using the browser handled by this controller. If \var{new} is 1, a new browser window is opened if possible. If \var{new} is 2, a new browser page ("tab") is opened if possible. \end{funcdesc} -\begin{funcdesc}{open_new_win}{url} +\begin{funcdesc}{open_new}{url} Open \var{url} in a new window of the browser handled by this controller, if possible, otherwise, open \var{url} in the only browser window. Alias \function{open_new}. @@ -155,5 +155,6 @@ \begin{funcdesc}{open_new_tab}{url} Open \var{url} in a new page ("tab") of the browser handled by this - controller, if possible, otherwise equivalent to \function{open_new_win}. + controller, if possible, otherwise equivalent to \function{open_new}. +\versionadded{2.5} \end{funcdesc} From nnorwitz at users.sourceforge.net Tue Oct 4 05:37:32 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Tue, 4 Oct 2005 05:37:32 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/ref ref6.tex,1.78,1.79 Message-ID: <20051004033732.BDD631E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/ref In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5523/Doc/ref Modified Files: ref6.tex Log Message: Armin caught this mistake in bug #973103. Index: ref6.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref6.tex,v retrieving revision 1.78 retrieving revision 1.79 diff -u -d -r1.78 -r1.79 --- ref6.tex 3 Oct 2005 16:39:51 -0000 1.78 +++ ref6.tex 4 Oct 2005 03:37:29 -0000 1.79 @@ -603,7 +603,7 @@ \keyword{continue} may only occur syntactically nested in a \keyword{for} or \keyword{while} loop, but not nested in a function or class definition or -\keyword{try} statement within that loop.\footnote{It may +\keyword{finally} statement within that loop.\footnote{It may occur within an \keyword{except} or \keyword{else} clause. The restriction on occurring in the \keyword{try} clause is implementor's laziness and will eventually be lifted.} From nnorwitz at users.sourceforge.net Tue Oct 4 05:38:04 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Tue, 4 Oct 2005 05:38:04 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/ref ref6.tex, 1.73.2.5, 1.73.2.6 Message-ID: <20051004033804.95FC71E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/ref In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5622/Doc/ref Modified Files: Tag: release24-maint ref6.tex Log Message: Armin caught this mistake in bug #973103. Index: ref6.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref6.tex,v retrieving revision 1.73.2.5 retrieving revision 1.73.2.6 diff -u -d -r1.73.2.5 -r1.73.2.6 --- ref6.tex 3 Oct 2005 16:40:36 -0000 1.73.2.5 +++ ref6.tex 4 Oct 2005 03:38:01 -0000 1.73.2.6 @@ -603,7 +603,7 @@ \keyword{continue} may only occur syntactically nested in a \keyword{for} or \keyword{while} loop, but not nested in a function or class definition or -\keyword{try} statement within that loop.\footnote{It may +\keyword{finally} statement within that loop.\footnote{It may occur within an \keyword{except} or \keyword{else} clause. The restriction on occurring in the \keyword{try} clause is implementor's laziness and will eventually be lifted.} From nnorwitz at users.sourceforge.net Tue Oct 4 05:43:36 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Tue, 4 Oct 2005 05:43:36 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/ref ref6.tex, 1.73.2.6, 1.73.2.7 Message-ID: <20051004034336.C070E1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/ref In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6243/Doc/ref Modified Files: Tag: release24-maint ref6.tex Log Message: Make close brace a paren to match open. Index: ref6.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref6.tex,v retrieving revision 1.73.2.6 retrieving revision 1.73.2.7 diff -u -d -r1.73.2.6 -r1.73.2.7 --- ref6.tex 4 Oct 2005 03:38:01 -0000 1.73.2.6 +++ ref6.tex 4 Oct 2005 03:43:33 -0000 1.73.2.7 @@ -526,7 +526,7 @@ exception that was active in the current scope. If no exception is active in the current scope, a \exception{TypeError} exception is raised indicating that this is an error (if running under IDLE, a -\exception{Queue.Empty} exception is raised instead}. +\exception{Queue.Empty} exception is raised instead). \index{exception} \indexii{raising}{exception} From nnorwitz at users.sourceforge.net Tue Oct 4 05:43:46 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Tue, 4 Oct 2005 05:43:46 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/ref ref6.tex,1.79,1.80 Message-ID: <20051004034346.59BBD1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/ref In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6267/Doc/ref Modified Files: ref6.tex Log Message: Make close brace a paren to match open. Index: ref6.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref6.tex,v retrieving revision 1.79 retrieving revision 1.80 diff -u -d -r1.79 -r1.80 --- ref6.tex 4 Oct 2005 03:37:29 -0000 1.79 +++ ref6.tex 4 Oct 2005 03:43:43 -0000 1.80 @@ -526,7 +526,7 @@ exception that was active in the current scope. If no exception is active in the current scope, a \exception{TypeError} exception is raised indicating that this is an error (if running under IDLE, a -\exception{Queue.Empty} exception is raised instead}. +\exception{Queue.Empty} exception is raised instead). \index{exception} \indexii{raising}{exception} From nnorwitz at users.sourceforge.net Tue Oct 4 06:32:45 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Tue, 4 Oct 2005 06:32:45 +0200 (CEST) Subject: [Python-checkins] python/dist/src Makefile.pre.in,1.152,1.153 Message-ID: <20051004043245.CD95C1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13504 Modified Files: Makefile.pre.in Log Message: Get "make install" working again after adding Lib/test/bad_coding.py which can't be compiled. Thanks to Mat Martineau for spotting the problem. Index: Makefile.pre.in =================================================================== RCS file: /cvsroot/python/python/dist/src/Makefile.pre.in,v retrieving revision 1.152 retrieving revision 1.153 diff -u -d -r1.152 -r1.153 --- Makefile.pre.in 24 Apr 2005 22:26:37 -0000 1.152 +++ Makefile.pre.in 4 Oct 2005 04:32:42 -0000 1.153 @@ -724,11 +724,11 @@ PYTHONPATH=$(DESTDIR)$(LIBDEST) $(RUNSHARED) \ ./$(BUILDPYTHON) -Wi -tt $(DESTDIR)$(LIBDEST)/compileall.py \ -d $(LIBDEST) -f \ - -x 'badsyntax|site-packages' $(DESTDIR)$(LIBDEST) + -x 'bad_coding|badsyntax|site-packages' $(DESTDIR)$(LIBDEST) PYTHONPATH=$(DESTDIR)$(LIBDEST) $(RUNSHARED) \ ./$(BUILDPYTHON) -Wi -tt -O $(DESTDIR)$(LIBDEST)/compileall.py \ -d $(LIBDEST) -f \ - -x 'badsyntax|site-packages' $(DESTDIR)$(LIBDEST) + -x 'bad_coding|badsyntax|site-packages' $(DESTDIR)$(LIBDEST) -PYTHONPATH=$(DESTDIR)$(LIBDEST) $(RUNSHARED) \ ./$(BUILDPYTHON) -Wi -t $(DESTDIR)$(LIBDEST)/compileall.py \ -d $(LIBDEST)/site-packages -f \ From rhettinger at users.sourceforge.net Wed Oct 5 13:39:15 2005 From: rhettinger at users.sourceforge.net (rhettinger@users.sourceforge.net) Date: Wed, 5 Oct 2005 13:39:15 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_bisect.py, 1.13, 1.14 Message-ID: <20051005113915.EC71F1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3838/Lib/test Modified Files: test_bisect.py Log Message: SF #1313496: bisect C replacement doesn't accept named args Index: test_bisect.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_bisect.py,v retrieving revision 1.13 retrieving revision 1.14 diff -u -d -r1.13 -r1.14 --- test_bisect.py 27 Sep 2004 23:11:35 -0000 1.13 +++ test_bisect.py 5 Oct 2005 11:39:12 -0000 1.14 @@ -130,6 +130,16 @@ def test_backcompatibility(self): self.assertEqual(bisect, bisect_right) + def test_keyword_args(self): + data = [10, 20, 30, 40, 50] + self.assertEqual(bisect_left(a=data, x=25, lo=1, hi=3), 2) + self.assertEqual(bisect_right(a=data, x=25, lo=1, hi=3), 2) + self.assertEqual(bisect(a=data, x=25, lo=1, hi=3), 2) + insort_left(a=data, x=25, lo=1, hi=3) + insort_right(a=data, x=25, lo=1, hi=3) + insort(a=data, x=25, lo=1, hi=3) + self.assertEqual(data, [10, 20, 25, 25, 25, 30, 40, 50]) + #============================================================================== class TestInsort(unittest.TestCase): From rhettinger at users.sourceforge.net Wed Oct 5 13:39:16 2005 From: rhettinger at users.sourceforge.net (rhettinger@users.sourceforge.net) Date: Wed, 5 Oct 2005 13:39:16 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Modules _bisectmodule.c,1.2,1.3 Message-ID: <20051005113916.3DBC91E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Modules In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3838/Modules Modified Files: _bisectmodule.c Log Message: SF #1313496: bisect C replacement doesn't accept named args Index: _bisectmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/_bisectmodule.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- _bisectmodule.c 2 Aug 2004 13:24:54 -0000 1.2 +++ _bisectmodule.c 5 Oct 2005 11:39:12 -0000 1.3 @@ -34,15 +34,16 @@ } static PyObject * -bisect_right(PyObject *self, PyObject *args) +bisect_right(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item; int lo = 0; int hi = -1; int index; + static char *keywords[] = {"a", "x", "lo", "hi", NULL}; - if (!PyArg_ParseTuple(args, "OO|ii:bisect_right", - &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|ii:bisect_right", + keywords, &list, &item, &lo, &hi)) return NULL; index = internal_bisect_right(list, item, lo, hi); if (index < 0) @@ -51,7 +52,7 @@ } PyDoc_STRVAR(bisect_right_doc, -"bisect_right(list, item[, lo[, hi]]) -> index\n\ +"bisect_right(a, x[, lo[, hi]]) -> index\n\ \n\ Return the index where to insert item x in list a, assuming a is sorted.\n\ \n\ @@ -63,15 +64,16 @@ slice of a to be searched.\n"); static PyObject * -insort_right(PyObject *self, PyObject *args) +insort_right(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item, *result; int lo = 0; int hi = -1; int index; + static char *keywords[] = {"a", "x", "lo", "hi", NULL}; - if (!PyArg_ParseTuple(args, "OO|ii:insort_right", - &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|ii:insort_right", + keywords, &list, &item, &lo, &hi)) return NULL; index = internal_bisect_right(list, item, lo, hi); if (index < 0) @@ -91,7 +93,7 @@ } PyDoc_STRVAR(insort_right_doc, -"insort_right(list, item[, lo[, hi]])\n\ +"insort_right(a, x[, lo[, hi]])\n\ \n\ Insert item x in list a, and keep it sorted assuming a is sorted.\n\ \n\ @@ -129,15 +131,16 @@ } static PyObject * -bisect_left(PyObject *self, PyObject *args) +bisect_left(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item; int lo = 0; int hi = -1; int index; + static char *keywords[] = {"a", "x", "lo", "hi", NULL}; - if (!PyArg_ParseTuple(args, "OO|ii:bisect_left", - &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|ii:bisect_left", + keywords, &list, &item, &lo, &hi)) return NULL; index = internal_bisect_left(list, item, lo, hi); if (index < 0) @@ -146,7 +149,7 @@ } PyDoc_STRVAR(bisect_left_doc, -"bisect_left(list, item[, lo[, hi]]) -> index\n\ +"bisect_left(a, x[, lo[, hi]]) -> index\n\ \n\ Return the index where to insert item x in list a, assuming a is sorted.\n\ \n\ @@ -158,15 +161,16 @@ slice of a to be searched.\n"); static PyObject * -insort_left(PyObject *self, PyObject *args) +insort_left(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item, *result; int lo = 0; int hi = -1; int index; + static char *keywords[] = {"a", "x", "lo", "hi", NULL}; - if (!PyArg_ParseTuple(args, "OO|ii:insort_left", - &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|ii:insort_left", + keywords, &list, &item, &lo, &hi)) return NULL; index = internal_bisect_left(list, item, lo, hi); if (index < 0) @@ -186,7 +190,7 @@ } PyDoc_STRVAR(insort_left_doc, -"insort_left(list, item[, lo[, hi]])\n\ +"insort_left(a, x[, lo[, hi]])\n\ \n\ Insert item x in list a, and keep it sorted assuming a is sorted.\n\ \n\ @@ -200,17 +204,17 @@ static PyMethodDef bisect_methods[] = { {"bisect_right", (PyCFunction)bisect_right, - METH_VARARGS, bisect_right_doc}, + METH_VARARGS|METH_KEYWORDS, bisect_right_doc}, {"bisect", (PyCFunction)bisect_right, - METH_VARARGS, bisect_doc}, + METH_VARARGS|METH_KEYWORDS, bisect_doc}, {"insort_right", (PyCFunction)insort_right, - METH_VARARGS, insort_right_doc}, + METH_VARARGS|METH_KEYWORDS, insort_right_doc}, {"insort", (PyCFunction)insort_right, - METH_VARARGS, insort_doc}, + METH_VARARGS|METH_KEYWORDS, insort_doc}, {"bisect_left", (PyCFunction)bisect_left, - METH_VARARGS, bisect_left_doc}, + METH_VARARGS|METH_KEYWORDS, bisect_left_doc}, {"insort_left", (PyCFunction)insort_left, - METH_VARARGS, insort_left_doc}, + METH_VARARGS|METH_KEYWORDS, insort_left_doc}, {NULL, NULL} /* sentinel */ }; From rhettinger at users.sourceforge.net Wed Oct 5 13:48:39 2005 From: rhettinger at users.sourceforge.net (rhettinger@users.sourceforge.net) Date: Wed, 5 Oct 2005 13:48:39 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_bisect.py, 1.13, 1.13.2.1 Message-ID: <20051005114839.8A9BE1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5788/Lib/test Modified Files: Tag: release24-maint test_bisect.py Log Message: SF #1313496: bisect C replacement doesn't accept named args Index: test_bisect.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_bisect.py,v retrieving revision 1.13 retrieving revision 1.13.2.1 diff -u -d -r1.13 -r1.13.2.1 --- test_bisect.py 27 Sep 2004 23:11:35 -0000 1.13 +++ test_bisect.py 5 Oct 2005 11:48:36 -0000 1.13.2.1 @@ -130,6 +130,16 @@ def test_backcompatibility(self): self.assertEqual(bisect, bisect_right) + def test_keyword_args(self): + data = [10, 20, 30, 40, 50] + self.assertEqual(bisect_left(a=data, x=25, lo=1, hi=3), 2) + self.assertEqual(bisect_right(a=data, x=25, lo=1, hi=3), 2) + self.assertEqual(bisect(a=data, x=25, lo=1, hi=3), 2) + insort_left(a=data, x=25, lo=1, hi=3) + insort_right(a=data, x=25, lo=1, hi=3) + insort(a=data, x=25, lo=1, hi=3) + self.assertEqual(data, [10, 20, 25, 25, 25, 30, 40, 50]) + #============================================================================== class TestInsort(unittest.TestCase): From rhettinger at users.sourceforge.net Wed Oct 5 13:48:39 2005 From: rhettinger at users.sourceforge.net (rhettinger@users.sourceforge.net) Date: Wed, 5 Oct 2005 13:48:39 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Modules _bisectmodule.c, 1.2, 1.2.4.1 Message-ID: <20051005114839.B6A091E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Modules In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5788/Modules Modified Files: Tag: release24-maint _bisectmodule.c Log Message: SF #1313496: bisect C replacement doesn't accept named args Index: _bisectmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/_bisectmodule.c,v retrieving revision 1.2 retrieving revision 1.2.4.1 diff -u -d -r1.2 -r1.2.4.1 --- _bisectmodule.c 2 Aug 2004 13:24:54 -0000 1.2 +++ _bisectmodule.c 5 Oct 2005 11:48:36 -0000 1.2.4.1 @@ -34,15 +34,16 @@ } static PyObject * -bisect_right(PyObject *self, PyObject *args) +bisect_right(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item; int lo = 0; int hi = -1; int index; + static char *keywords[] = {"a", "x", "lo", "hi", NULL}; - if (!PyArg_ParseTuple(args, "OO|ii:bisect_right", - &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|ii:bisect_right", + keywords, &list, &item, &lo, &hi)) return NULL; index = internal_bisect_right(list, item, lo, hi); if (index < 0) @@ -51,7 +52,7 @@ } PyDoc_STRVAR(bisect_right_doc, -"bisect_right(list, item[, lo[, hi]]) -> index\n\ +"bisect_right(a, x[, lo[, hi]]) -> index\n\ \n\ Return the index where to insert item x in list a, assuming a is sorted.\n\ \n\ @@ -63,15 +64,16 @@ slice of a to be searched.\n"); static PyObject * -insort_right(PyObject *self, PyObject *args) +insort_right(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item, *result; int lo = 0; int hi = -1; int index; + static char *keywords[] = {"a", "x", "lo", "hi", NULL}; - if (!PyArg_ParseTuple(args, "OO|ii:insort_right", - &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|ii:insort_right", + keywords, &list, &item, &lo, &hi)) return NULL; index = internal_bisect_right(list, item, lo, hi); if (index < 0) @@ -91,7 +93,7 @@ } PyDoc_STRVAR(insort_right_doc, -"insort_right(list, item[, lo[, hi]])\n\ +"insort_right(a, x[, lo[, hi]])\n\ \n\ Insert item x in list a, and keep it sorted assuming a is sorted.\n\ \n\ @@ -129,15 +131,16 @@ } static PyObject * -bisect_left(PyObject *self, PyObject *args) +bisect_left(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item; int lo = 0; int hi = -1; int index; + static char *keywords[] = {"a", "x", "lo", "hi", NULL}; - if (!PyArg_ParseTuple(args, "OO|ii:bisect_left", - &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|ii:bisect_left", + keywords, &list, &item, &lo, &hi)) return NULL; index = internal_bisect_left(list, item, lo, hi); if (index < 0) @@ -146,7 +149,7 @@ } PyDoc_STRVAR(bisect_left_doc, -"bisect_left(list, item[, lo[, hi]]) -> index\n\ +"bisect_left(a, x[, lo[, hi]]) -> index\n\ \n\ Return the index where to insert item x in list a, assuming a is sorted.\n\ \n\ @@ -158,15 +161,16 @@ slice of a to be searched.\n"); static PyObject * -insort_left(PyObject *self, PyObject *args) +insort_left(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item, *result; int lo = 0; int hi = -1; int index; + static char *keywords[] = {"a", "x", "lo", "hi", NULL}; - if (!PyArg_ParseTuple(args, "OO|ii:insort_left", - &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|ii:insort_left", + keywords, &list, &item, &lo, &hi)) return NULL; index = internal_bisect_left(list, item, lo, hi); if (index < 0) @@ -186,7 +190,7 @@ } PyDoc_STRVAR(insort_left_doc, -"insort_left(list, item[, lo[, hi]])\n\ +"insort_left(a, x[, lo[, hi]])\n\ \n\ Insert item x in list a, and keep it sorted assuming a is sorted.\n\ \n\ @@ -200,17 +204,17 @@ static PyMethodDef bisect_methods[] = { {"bisect_right", (PyCFunction)bisect_right, - METH_VARARGS, bisect_right_doc}, + METH_VARARGS|METH_KEYWORDS, bisect_right_doc}, {"bisect", (PyCFunction)bisect_right, - METH_VARARGS, bisect_doc}, + METH_VARARGS|METH_KEYWORDS, bisect_doc}, {"insort_right", (PyCFunction)insort_right, - METH_VARARGS, insort_right_doc}, + METH_VARARGS|METH_KEYWORDS, insort_right_doc}, {"insort", (PyCFunction)insort_right, - METH_VARARGS, insort_doc}, + METH_VARARGS|METH_KEYWORDS, insort_doc}, {"bisect_left", (PyCFunction)bisect_left, - METH_VARARGS, bisect_left_doc}, + METH_VARARGS|METH_KEYWORDS, bisect_left_doc}, {"insort_left", (PyCFunction)insort_left, - METH_VARARGS, insort_left_doc}, + METH_VARARGS|METH_KEYWORDS, insort_left_doc}, {NULL, NULL} /* sentinel */ }; From rhettinger at users.sourceforge.net Wed Oct 5 13:48:40 2005 From: rhettinger at users.sourceforge.net (rhettinger@users.sourceforge.net) Date: Wed, 5 Oct 2005 13:48:40 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS, 1.1193.2.119, 1.1193.2.120 Message-ID: <20051005114840.B77F21E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5788/Misc Modified Files: Tag: release24-maint NEWS Log Message: SF #1313496: bisect C replacement doesn't accept named args Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1193.2.119 retrieving revision 1.1193.2.120 diff -u -d -r1.1193.2.119 -r1.1193.2.120 --- NEWS 3 Oct 2005 04:50:55 -0000 1.1193.2.119 +++ NEWS 5 Oct 2005 11:48:36 -0000 1.1193.2.120 @@ -30,6 +30,8 @@ Library ------- +- SF #1313496: the bisect module now accepts named arguments. + - Bug #729103: pydoc.py: Fix docother() method to accept additional "parent" argument. From perky at users.sourceforge.net Thu Oct 6 17:52:04 2005 From: perky at users.sourceforge.net (perky@users.sourceforge.net) Date: Thu, 6 Oct 2005 17:52:04 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Modules/cjkcodecs cjkcodecs.h, 1.5, 1.6 Message-ID: <20051006155204.010FF1E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Modules/cjkcodecs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29224/Modules/cjkcodecs Modified Files: cjkcodecs.h Log Message: Change the internal "undefined codepoint" mark for CJKCodecs decoders from U+FFFD to U+FFFE which is considered more appropriate. (from MAL's comment) Index: cjkcodecs.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/cjkcodecs/cjkcodecs.h,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cjkcodecs.h 19 Aug 2004 17:49:56 -0000 1.5 +++ cjkcodecs.h 6 Oct 2005 15:51:59 -0000 1.6 @@ -12,7 +12,10 @@ #include "multibytecodec.h" -#define UNIINV Py_UNICODE_REPLACEMENT_CHARACTER +/* a unicode "undefined" codepoint */ +#define UNIINV 0xFFFE + +/* internal-use DBCS codepoints which aren't used by any charsets */ #define NOCHAR 0xFFFF #define MULTIC 0xFFFE #define DBCINV 0xFFFD From doerwalter at users.sourceforge.net Thu Oct 6 22:30:00 2005 From: doerwalter at users.sourceforge.net (doerwalter@users.sourceforge.net) Date: Thu, 6 Oct 2005 22:30:00 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_codecs.py, 1.26, 1.27 Message-ID: <20051006203000.EC2D71E4003@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6122/Lib/test Modified Files: test_codecs.py Log Message: Part of SF patch #1313939: Speedup charmap decoding by extending PyUnicode_DecodeCharmap() the accept a unicode string as the mapping argument which is used as a mapping table. This code isn't used by any of the codecs yet. Index: test_codecs.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_codecs.py,v retrieving revision 1.26 retrieving revision 1.27 diff -u -d -r1.26 -r1.27 --- test_codecs.py 30 Aug 2005 10:23:13 -0000 1.26 +++ test_codecs.py 6 Oct 2005 20:29:57 -0000 1.27 @@ -924,6 +924,40 @@ (chars, size) = codecs.getdecoder(encoding)(bytes) self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding)) +class CharmapTest(unittest.TestCase): + def test_decode_with_string_map(self): + self.assertEquals( + codecs.charmap_decode("\x00\x01\x02", "strict", u"abc"), + (u"abc", 3) + ) + + self.assertEquals( + codecs.charmap_decode("\x00\x01\x02", "replace", u"ab"), + (u"ab\ufffd", 3) + ) + + self.assertEquals( + codecs.charmap_decode("\x00\x01\x02", "replace", u"ab\ufffe"), + (u"ab\ufffd", 3) + ) + + self.assertEquals( + codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab"), + (u"ab", 3) + ) + + self.assertEquals( + codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab\ufffe"), + (u"ab", 3) + ) + + allbytes = "".join(chr(i) for i in xrange(256)) + self.assertEquals( + codecs.charmap_decode(allbytes, "ignore", u""), + (u"", len(allbytes)) + ) + + def test_main(): test_support.run_unittest( UTF16Test, @@ -940,7 +974,8 @@ StreamReaderTest, Str2StrTest, BasicUnicodeTest, - BasicStrTest + BasicStrTest, + CharmapTest ) From doerwalter at users.sourceforge.net Thu Oct 6 22:30:01 2005 From: doerwalter at users.sourceforge.net (doerwalter@users.sourceforge.net) Date: Thu, 6 Oct 2005 22:30:01 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Objects unicodeobject.c, 2.231, 2.232 Message-ID: <20051006203001.283881E4003@bag.python.org> Update of /cvsroot/python/python/dist/src/Objects In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6122/Objects Modified Files: unicodeobject.c Log Message: Part of SF patch #1313939: Speedup charmap decoding by extending PyUnicode_DecodeCharmap() the accept a unicode string as the mapping argument which is used as a mapping table. This code isn't used by any of the codecs yet. Index: unicodeobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v retrieving revision 2.231 retrieving revision 2.232 diff -u -d -r2.231 -r2.232 --- unicodeobject.c 30 Aug 2005 10:23:14 -0000 2.231 +++ unicodeobject.c 6 Oct 2005 20:29:57 -0000 2.232 @@ -2833,6 +2833,8 @@ int extrachars = 0; PyObject *errorHandler = NULL; PyObject *exc = NULL; + Py_UNICODE *mapstring = NULL; + int maplen = 0; /* Default to Latin-1 */ if (mapping == NULL) @@ -2845,91 +2847,121 @@ return (PyObject *)v; p = PyUnicode_AS_UNICODE(v); e = s + size; - while (s < e) { - unsigned char ch = *s; - PyObject *w, *x; + if (PyUnicode_CheckExact(mapping)) { + mapstring = PyUnicode_AS_UNICODE(mapping); + maplen = PyUnicode_GET_SIZE(mapping); + while (s < e) { + unsigned char ch = *s; + Py_UNICODE x = 0xfffe; /* illegal value */ - /* Get mapping (char ordinal -> integer, Unicode char or None) */ - w = PyInt_FromLong((long)ch); - if (w == NULL) - goto onError; - x = PyObject_GetItem(mapping, w); - Py_DECREF(w); - if (x == NULL) { - if (PyErr_ExceptionMatches(PyExc_LookupError)) { - /* No mapping found means: mapping is undefined. */ - PyErr_Clear(); - x = Py_None; - Py_INCREF(x); - } else - goto onError; - } + if (ch < maplen) + x = mapstring[ch]; - /* Apply mapping */ - if (PyInt_Check(x)) { - long value = PyInt_AS_LONG(x); - if (value < 0 || value > 65535) { - PyErr_SetString(PyExc_TypeError, - "character mapping must be in range(65536)"); - Py_DECREF(x); - goto onError; + if (x == 0xfffe) { + /* undefined mapping */ + outpos = p-PyUnicode_AS_UNICODE(v); + startinpos = s-starts; + endinpos = startinpos+1; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "charmap", "character maps to ", + starts, size, &startinpos, &endinpos, &exc, &s, + (PyObject **)&v, &outpos, &p)) { + goto onError; + } + continue; } - *p++ = (Py_UNICODE)value; + *p++ = x; + ++s; } - else if (x == Py_None) { - /* undefined mapping */ - outpos = p-PyUnicode_AS_UNICODE(v); - startinpos = s-starts; - endinpos = startinpos+1; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "charmap", "character maps to ", - starts, size, &startinpos, &endinpos, &exc, &s, - (PyObject **)&v, &outpos, &p)) { - Py_DECREF(x); + } + else { + while (s < e) { + unsigned char ch = *s; + PyObject *w, *x; + + /* Get mapping (char ordinal -> integer, Unicode char or None) */ + w = PyInt_FromLong((long)ch); + if (w == NULL) goto onError; + x = PyObject_GetItem(mapping, w); + Py_DECREF(w); + if (x == NULL) { + if (PyErr_ExceptionMatches(PyExc_LookupError)) { + /* No mapping found means: mapping is undefined. */ + PyErr_Clear(); + x = Py_None; + Py_INCREF(x); + } else + goto onError; } - continue; - } - else if (PyUnicode_Check(x)) { - int targetsize = PyUnicode_GET_SIZE(x); - - if (targetsize == 1) - /* 1-1 mapping */ - *p++ = *PyUnicode_AS_UNICODE(x); - - else if (targetsize > 1) { - /* 1-n mapping */ - if (targetsize > extrachars) { - /* resize first */ - int oldpos = (int)(p - PyUnicode_AS_UNICODE(v)); - int needed = (targetsize - extrachars) + \ - (targetsize << 2); - extrachars += needed; - if (_PyUnicode_Resize(&v, - PyUnicode_GET_SIZE(v) + needed) < 0) { - Py_DECREF(x); - goto onError; + + /* Apply mapping */ + if (PyInt_Check(x)) { + long value = PyInt_AS_LONG(x); + if (value < 0 || value > 65535) { + PyErr_SetString(PyExc_TypeError, + "character mapping must be in range(65536)"); + Py_DECREF(x); + goto onError; + } + *p++ = (Py_UNICODE)value; + } + else if (x == Py_None) { + /* undefined mapping */ + outpos = p-PyUnicode_AS_UNICODE(v); + startinpos = s-starts; + endinpos = startinpos+1; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "charmap", "character maps to ", + starts, size, &startinpos, &endinpos, &exc, &s, + (PyObject **)&v, &outpos, &p)) { + Py_DECREF(x); + goto onError; + } + continue; + } + else if (PyUnicode_Check(x)) { + int targetsize = PyUnicode_GET_SIZE(x); + + if (targetsize == 1) + /* 1-1 mapping */ + *p++ = *PyUnicode_AS_UNICODE(x); + + else if (targetsize > 1) { + /* 1-n mapping */ + if (targetsize > extrachars) { + /* resize first */ + int oldpos = (int)(p - PyUnicode_AS_UNICODE(v)); + int needed = (targetsize - extrachars) + \ + (targetsize << 2); + extrachars += needed; + if (_PyUnicode_Resize(&v, + PyUnicode_GET_SIZE(v) + needed) < 0) { + Py_DECREF(x); + goto onError; + } + p = PyUnicode_AS_UNICODE(v) + oldpos; } - p = PyUnicode_AS_UNICODE(v) + oldpos; + Py_UNICODE_COPY(p, + PyUnicode_AS_UNICODE(x), + targetsize); + p += targetsize; + extrachars -= targetsize; } - Py_UNICODE_COPY(p, - PyUnicode_AS_UNICODE(x), - targetsize); - p += targetsize; - extrachars -= targetsize; + /* 1-0 mapping: skip the character */ + } + else { + /* wrong return value */ + PyErr_SetString(PyExc_TypeError, + "character mapping must return integer, None or unicode"); + Py_DECREF(x); + goto onError; } - /* 1-0 mapping: skip the character */ - } - else { - /* wrong return value */ - PyErr_SetString(PyExc_TypeError, - "character mapping must return integer, None or unicode"); Py_DECREF(x); - goto onError; + ++s; } - Py_DECREF(x); - ++s; } if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v)) if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))) < 0) From doerwalter at users.sourceforge.net Thu Oct 6 22:30:01 2005 From: doerwalter at users.sourceforge.net (doerwalter@users.sourceforge.net) Date: Thu, 6 Oct 2005 22:30:01 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/api concrete.tex,1.67,1.68 Message-ID: <20051006203001.700151E4003@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/api In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6122/Doc/api Modified Files: concrete.tex Log Message: Part of SF patch #1313939: Speedup charmap decoding by extending PyUnicode_DecodeCharmap() the accept a unicode string as the mapping argument which is used as a mapping table. This code isn't used by any of the codecs yet. Index: concrete.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/api/concrete.tex,v retrieving revision 1.67 retrieving revision 1.68 diff -u -d -r1.67 -r1.68 --- concrete.tex 28 Sep 2005 12:53:12 -0000 1.67 +++ concrete.tex 6 Oct 2005 20:29:57 -0000 1.68 @@ -1322,7 +1322,12 @@ const char *errors} Create a Unicode object by decoding \var{size} bytes of the encoded string \var{s} using the given \var{mapping} object. Return - \NULL{} if an exception was raised by the codec. + \NULL{} if an exception was raised by the codec. If \var{mapping} is \NULL{} + latin-1 decoding will be done. Else it can be a dictionary mapping byte or a + unicode string, which is treated as a lookup table. Byte values greater + that the length of the string and U+FFFE "characters" are treated as + "undefined mapping". + \versionchanged[Allowed unicode string as mapping argument]{2.4} \end{cfuncdesc} \begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeCharmap}{const Py_UNICODE *s, From doerwalter at users.sourceforge.net Thu Oct 6 22:30:01 2005 From: doerwalter at users.sourceforge.net (doerwalter@users.sourceforge.net) Date: Thu, 6 Oct 2005 22:30:01 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS,1.1386,1.1387 Message-ID: <20051006203001.D61701E4007@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6122/Misc Modified Files: NEWS Log Message: Part of SF patch #1313939: Speedup charmap decoding by extending PyUnicode_DecodeCharmap() the accept a unicode string as the mapping argument which is used as a mapping table. This code isn't used by any of the codecs yet. Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1386 retrieving revision 1.1387 diff -u -d -r1.1386 -r1.1387 --- NEWS 3 Oct 2005 14:16:44 -0000 1.1386 +++ NEWS 6 Oct 2005 20:29:57 -0000 1.1387 @@ -563,6 +563,11 @@ - Removed PyRange_New(). +- Patch #1313939: PyUnicode_DecodeCharmap() accepts a unicode string as the + mapping argument now. This string is used as a mapping table. Byte values + greater than the length of the string and 0xFFFE are treated as undefined + mappings. + Tests ----- From nascheme at users.sourceforge.net Fri Oct 7 07:09:32 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Fri, 7 Oct 2005 07:09:32 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Objects codeobject.c, 1.1.2.4, 1.1.2.5 Message-ID: <20051007050932.EB9C51E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Objects In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16235/Objects Modified Files: Tag: ast-branch codeobject.c Log Message: Merge MWH's fix for new.code (r2.315) into ast-branch. Index: codeobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/Attic/codeobject.c,v retrieving revision 1.1.2.4 retrieving revision 1.1.2.5 diff -u -d -r1.1.2.4 -r1.1.2.5 --- codeobject.c 11 Jul 2005 04:03:11 -0000 1.1.2.4 +++ codeobject.c 7 Oct 2005 05:09:29 -0000 1.1.2.5 @@ -25,29 +25,27 @@ return 1; } -static int +static void intern_strings(PyObject *tuple) { int i; for (i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); - if (v == NULL || !PyString_Check(v)) { + if (v == NULL || !PyString_CheckExact(v)) { Py_FatalError("non-string found in code slot"); - PyErr_BadInternalCall(); - return -1; } PyString_InternInPlace(&PyTuple_GET_ITEM(tuple, i)); } - return 0; } + PyCodeObject * PyCode_New(int argcount, int nlocals, int stacksize, int flags, PyObject *code, PyObject *consts, PyObject *names, PyObject *varnames, PyObject *freevars, PyObject *cellvars, PyObject *filename, PyObject *name, int firstlineno, - PyObject *lnotab) + PyObject *lnotab) { PyCodeObject *co; int i; @@ -129,6 +127,50 @@ {NULL} /* Sentinel */ }; +/* Helper for code_new: return a shallow copy of a tuple that is + guaranteed to contain exact strings, by converting string subclasses + to exact strings and complaining if a non-string is found. */ +static PyObject* +validate_and_copy_tuple(PyObject *tup) +{ + PyObject *newtuple; + PyObject *item; + int i, len; + + len = PyTuple_GET_SIZE(tup); + newtuple = PyTuple_New(len); + if (newtuple == NULL) + return NULL; + + for (i = 0; i < len; i++) { + item = PyTuple_GET_ITEM(tup, i); + if (PyString_CheckExact(item)) { + Py_INCREF(item); + } + else if (!PyString_Check(item)) { + PyErr_Format( + PyExc_TypeError, + "name tuples must contain only " + "strings, not '%.500s'", + item->ob_type->tp_name); + Py_DECREF(newtuple); + return NULL; + } + else { + item = PyString_FromStringAndSize( + PyString_AS_STRING(item), + PyString_GET_SIZE(item)); + if (item == NULL) { + Py_DECREF(newtuple); + return NULL; + } + } + PyTuple_SET_ITEM(newtuple, i, item); + } + + return newtuple; +} + PyDoc_STRVAR(code_doc, "code(argcount, nlocals, stacksize, flags, codestring, constants, names,\n\ varnames, filename, name, firstlineno, lnotab[, freevars[, cellvars]])\n\ @@ -142,12 +184,13 @@ int nlocals; int stacksize; int flags; + PyObject *co = NULL; PyObject *code; PyObject *consts; - PyObject *names; - PyObject *varnames; - PyObject *freevars = NULL; - PyObject *cellvars = NULL; + PyObject *names, *ournames = NULL; + PyObject *varnames, *ourvarnames = NULL; + PyObject *freevars = NULL, *ourfreevars = NULL; + PyObject *cellvars = NULL, *ourcellvars = NULL; PyObject *filename; PyObject *name; int firstlineno; @@ -165,31 +208,49 @@ &PyTuple_Type, &cellvars)) return NULL; - if (freevars == NULL || cellvars == NULL) { - PyObject *empty = PyTuple_New(0); - if (empty == NULL) - return NULL; - if (freevars == NULL) { - freevars = empty; - Py_INCREF(freevars); - } - if (cellvars == NULL) { - cellvars = empty; - Py_INCREF(cellvars); - } - Py_DECREF(empty); + if (argcount < 0) { + PyErr_SetString( + PyExc_ValueError, + "code: argcount must not be negative"); + goto cleanup; } - if (!PyObject_CheckReadBuffer(code)) { - PyErr_SetString(PyExc_TypeError, - "bytecode object must be a single-segment read-only buffer"); - return NULL; + if (nlocals < 0) { + PyErr_SetString( + PyExc_ValueError, + "code: nlocals must not be negative"); + goto cleanup; } - return (PyObject *)PyCode_New(argcount, nlocals, stacksize, flags, - code, consts, names, varnames, - freevars, cellvars, filename, name, - firstlineno, lnotab); + ournames = validate_and_copy_tuple(names); + if (ournames == NULL) + goto cleanup; + ourvarnames = validate_and_copy_tuple(varnames); + if (ourvarnames == NULL) + goto cleanup; + if (freevars) + ourfreevars = validate_and_copy_tuple(freevars); + else + ourfreevars = PyTuple_New(0); + if (ourfreevars == NULL) + goto cleanup; + if (cellvars) + ourcellvars = validate_and_copy_tuple(cellvars); + else + ourcellvars = PyTuple_New(0); + if (ourcellvars == NULL) + goto cleanup; + + co = (PyObject *)PyCode_New(argcount, nlocals, stacksize, flags, + code, consts, ournames, ourvarnames, + ourfreevars, ourcellvars, filename, + name, firstlineno, lnotab); + cleanup: + Py_XDECREF(ournames); + Py_XDECREF(ourvarnames); + Py_XDECREF(ourfreevars); + Py_XDECREF(ourcellvars); + return co; } static void From vsajip at users.sourceforge.net Fri Oct 7 10:35:42 2005 From: vsajip at users.sourceforge.net (vsajip@users.sourceforge.net) Date: Fri, 7 Oct 2005 10:35:42 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/logging __init__.py, 1.30, 1.31 Message-ID: <20051007083542.9C0BE1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/logging In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17951 Modified Files: __init__.py Log Message: Fixed bug where the logging message was wrongly being demoted from Unicode to string (SF #1314107) Index: __init__.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/logging/__init__.py,v retrieving revision 1.30 retrieving revision 1.31 diff -u -d -r1.30 -r1.31 --- __init__.py 16 Sep 2005 10:33:40 -0000 1.30 +++ __init__.py 7 Oct 2005 08:35:36 -0000 1.31 @@ -41,8 +41,8 @@ __author__ = "Vinay Sajip " __status__ = "beta" -__version__ = "0.4.9.6" -__date__ = "27 March 2005" +__version__ = "0.4.9.7" +__date__ = "07 October 2005" #--------------------------------------------------------------------------- # Miscellaneous module data @@ -266,10 +266,12 @@ if not hasattr(types, "UnicodeType"): #if no unicode support... msg = str(self.msg) else: - try: - msg = str(self.msg) - except UnicodeError: - msg = self.msg #Defer encoding till later + msg = self.msg + if type(msg) not in (types.UnicodeType, types.StringType): + try: + msg = str(self.msg) + except UnicodeError: + msg = self.msg #Defer encoding till later if self.args: msg = msg % self.args return msg From vsajip at users.sourceforge.net Fri Oct 7 10:36:36 2005 From: vsajip at users.sourceforge.net (vsajip@users.sourceforge.net) Date: Fri, 7 Oct 2005 10:36:36 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/logging __init__.py, 1.24.2.2, 1.24.2.3 Message-ID: <20051007083636.20D071E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/logging In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18262 Modified Files: Tag: release24-maint __init__.py Log Message: Fixed bug where the logging message was wrongly being demoted from Unicode to string (SF #1314107) Index: __init__.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/logging/__init__.py,v retrieving revision 1.24.2.2 retrieving revision 1.24.2.3 diff -u -d -r1.24.2.2 -r1.24.2.3 --- __init__.py 16 Sep 2005 10:44:40 -0000 1.24.2.2 +++ __init__.py 7 Oct 2005 08:36:33 -0000 1.24.2.3 @@ -41,8 +41,8 @@ __author__ = "Vinay Sajip " __status__ = "beta" -__version__ = "0.4.9.6" -__date__ = "27 March 2005" +__version__ = "0.4.9.7" +__date__ = "07 October 2005" #--------------------------------------------------------------------------- # Miscellaneous module data @@ -266,10 +266,12 @@ if not hasattr(types, "UnicodeType"): #if no unicode support... msg = str(self.msg) else: - try: - msg = str(self.msg) - except UnicodeError: - msg = self.msg #Defer encoding till later + msg = self.msg + if type(msg) not in (types.UnicodeType, types.StringType): + try: + msg = str(self.msg) + except UnicodeError: + msg = self.msg #Defer encoding till later if self.args: msg = msg % self.args return msg From jhylton at users.sourceforge.net Fri Oct 7 20:42:54 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Fri, 7 Oct 2005 20:42:54 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python newcompile.c, 1.1.2.110, 1.1.2.111 Message-ID: <20051007184254.666F51E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6237 Modified Files: Tag: ast-branch newcompile.c Log Message: Fix a small bug in test_dis and add some high-level comments. One failure in test_dis was shallow. test_dis() failed because the compiler generated a redundant LOAD_CONST None / RETURN_VALUE block. The assembler() now checks the b_return flag of the final block before emitting a new RETURN_VALUE opcode. Index: newcompile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/Attic/newcompile.c,v retrieving revision 1.1.2.110 retrieving revision 1.1.2.111 diff -u -d -r1.1.2.110 -r1.1.2.111 --- newcompile.c 28 Jul 2005 05:50:01 -0000 1.1.2.110 +++ newcompile.c 7 Oct 2005 18:42:50 -0000 1.1.2.111 @@ -1,3 +1,19 @@ +/* + * This file compiles an abstract syntax tree (AST) into Python bytecode. + * + * The primary entry point is PyAST_Compile(), which returns a + * PyCodeObject. The compiler makes several passes to build the code + * object: + * 1. Checks for future statements. See future.c + * 2. Builds a symbol table. See symtable.c. + * 3. Generate code for basic blocks. See compiler_mod() in this file. + * 4. Assemble the basic blocks into final code. See assemble() in + * this file. + * + * Note that compiler_mod() suggests module, but the module ast type + * (mod_ty) has cases for expressions and interactive statements. + */ + #include "Python.h" #include "Python-ast.h" @@ -134,6 +150,13 @@ has been generated with current lineno */ }; +/* This struct captures the global state of a compilation. + + The u pointer points to the current compilation unit, while units + for enclosing blocks are stored in c_stack. The u and c_stack are + managed by compiler_enter_scope() and compiler_exit_scope(). +*/ + struct compiler { const char *c_filename; struct symtable *c_st; @@ -143,9 +166,9 @@ int c_interactive; int c_nestlevel; - struct compiler_unit *u; - PyObject *c_stack; - char *c_encoding; /* source encoding (a borrowed reference) */ + struct compiler_unit *u; /* compiler state for current block */ + PyObject *c_stack; /* Python list holding compiler_unit ptrs */ + char *c_encoding; /* source encoding (a borrowed reference) */ }; struct assembler { @@ -3559,10 +3582,12 @@ XXX NEXT_BLOCK() isn't quite right, because if the last block ends with a jump or return b_next shouldn't set. */ - NEXT_BLOCK(c); - if (addNone) - ADDOP_O(c, LOAD_CONST, Py_None, consts); - ADDOP(c, RETURN_VALUE); + if (!c->u->u_curblock->b_return) { + NEXT_BLOCK(c); + if (addNone) + ADDOP_O(c, LOAD_CONST, Py_None, consts); + ADDOP(c, RETURN_VALUE); + } nblocks = 0; entryblock = NULL; From galvescarvalho at yahoo.com.br Sat Oct 8 17:21:55 2005 From: galvescarvalho at yahoo.com.br (Geraldo Alves C.) Date: Sat, 8 Oct 2005 12:21:55 -0300 Subject: [Python-checkins] 2.000 Modelos de Cartas Comerciais, Modelos de Propostas, contratos e Documentos Message-ID: <20051008152210.08BAE1E4006@bag.python.org> Os melhores Modelos de Contratos e Cartas Comerciais. Como escrever uma proposta, ducumento ou carta comercial: Visite agora: http://www.gueb.de/modelosdecartascomerciais Modelos de cartas de agradecimento, modelos de cartas de demiss?o, modelos de cartas com mensagem mensagens de p?sames, modelos de convites, modelos de declara??es, modelos de cartas de solicita??o de empregos, modelos de cartas de cobran?as, modelos de cartas de recomenda??o, modelos de cartas de recomenda??es, redigir cartas comerciais, modelos de propostas comerciais, modelos de respostas a propostas comerciais, dicas de como redigir, dicas de reda??o de cartas comerciais, modelos de pedidos, modelos de atestados m?dicos. Modelos de cartas em ingl?s, modelos de cartas de reclama??o, modelos de cartas de refer?ncias, dicas de como escrever cartas comerciais, dicas de como redigir cartas comerciais, modelos de convites para festas, modelos de convites para eventos, modelos de cartas de pedidos de demiss?o, modelos de cartas de felicita??es, modelos de cartas formais, modelos de recibos, proposta de presta??o de servi?os, Modelos de Contratos Como escrever um curr?culo, como redigir documentos: Visite agora: http://www.gueb.de/modelosdecartascomerciais Modelos de cartas Convites, Propostas, Atas, Contratos, Agradecimentos, Empregos, Solicita??o, Solicita??es, Apresenta??o, Apresenta??es, Demiss?o, Demiss?es, Cobran?as, Pedidos, Atestados, Declara??es, Declara??o, P?sames, Condol?ncias, Batizados, Fomal, Formais, Redigir, Escrever, Escritas, Textos, Mensagens, Mensagem, Ingl?s, Clientes, Fornecedores, Empresas, Neg?cios, Marketing, Comunicados, Comunica??o, Secret?rias, Amigos, Parentes, Vendedores, Vendedoras, Refer?ncias, Social, Sociais, Respostas, Dicas, M?dicos, Advogados, Duplicatas, Felicita??o, Felicita??es, Recomenda??o, Recomenda??es, Funcion?rios, Reclama??o, Reclama??es, Comunicar, Comunicados, Documentos, Viagens, Viagem, Solenidades, Confraterniza??o, Confraterniza??es, Resultados, Pre?os, Aumentos, Entregas, F?rias, Estudantes, Lan?amentos, Novos, Aviso Pr?vio, Est?gios, Escolares, Protestos, D?bitos, Exposi??o, Exposi??es, Feiras, Acordos, Recibos, Procura??es, Edital, Editais" modelos de cartas de demiss?o, modelos de cartas com mensagem mensagens de p?sames, modelos de convites, modelos de declara??es, modelos de cartas de solicita??o de empregos, modelos de cartas de cobran?as, modelos de cartas de recomenda??o, modelos de cartas de recomenda??es, redigir cartas comerciais, modelos de propostas comerciais, modelos de respostas a propostas comerciais, dicas de como redigir, dicas de reda??o de cartas comerciais, modelos de pedidos. Visite agora: http://www.gueb.de/modelosdecartascomerciais modelos de cartas de demiss?o, modelos de cartas com mensagem mensagens de p?sames, modelos de convites, modelos de declara??es, modelos de cartas de solicita??o de empregos, modelos de cartas de cobran?as, modelos de cartas de recomenda??o, modelos de cartas de recomenda??es, redigir cartas comerciais, modelos de propostas comerciais, modelos de respostas a propostas comerciais, dicas de como redigir, dicas de reda??o de cartas comerciais, modelos de pedidos, modelos de atestados m?dicos. Modelos de cartas em ingl?s, modelos de cartas de reclama??o, modelos de cartas de refer?ncias, dicas de como escrever cartas comerciais, dicas de como redigir cartas comerciais, modelos de convites para festas, modelos de convites para eventos, modelos de cartas de pedidos de demiss?o, modelos de cartas de felicita??es, modelos de cartas formais, modelos de recibos, proposta de presta??o de servi?os, Modelos de Contratos Cartas Comerciais, cartas, contartos, modelos, empresa, contabilidade, advocacia, advogado, direito, procura??o, memorando. http://www.gueb.de/modelosdecartascomerciais propostas comerciais, dicas de como redigir, dicas de reda??o de cartas comerciais, modelos de pedidos, modelos de atestados m?dicos. Modelos de cartas em ingl?s, modelos de cartas de reclama??o, modelos de cartas de refer?ncias, dicas de como escrever cartas comerciais, dicas de como redigir cartas comerciais, modelos de convites para From gvanrossum at users.sourceforge.net Sat Oct 8 22:04:40 2005 From: gvanrossum at users.sourceforge.net (gvanrossum@users.sourceforge.net) Date: Sat, 8 Oct 2005 22:04:40 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_cmd_line.py, 1.1, 1.2 Message-ID: <20051008200440.52F061E4006@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv7933 Modified Files: test_cmd_line.py Log Message: Fix unit test failure -- the output received from Python can be empty, but verify_valid_flag() wasn't expecting that. Will backport. Index: test_cmd_line.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_cmd_line.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- test_cmd_line.py 3 Oct 2005 00:54:57 -0000 1.1 +++ test_cmd_line.py 8 Oct 2005 20:04:36 -0000 1.2 @@ -17,7 +17,7 @@ def verify_valid_flag(self, cmd_line): data = self.start_python(cmd_line) - self.assertTrue(data.endswith('\n')) + self.assertTrue(data == '' or data.endswith('\n')) self.assertTrue('Traceback' not in data) def test_environment(self): From gvanrossum at users.sourceforge.net Sat Oct 8 22:04:58 2005 From: gvanrossum at users.sourceforge.net (gvanrossum@users.sourceforge.net) Date: Sat, 8 Oct 2005 22:04:58 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_cmd_line.py, 1.1.2.2, 1.1.2.3 Message-ID: <20051008200458.854F11E4006@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv7994 Modified Files: Tag: release24-maint test_cmd_line.py Log Message: Fix unit test failure -- the output received from Python can be empty, but verify_valid_flag() wasn't expecting that. (Backport.) Index: test_cmd_line.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_cmd_line.py,v retrieving revision 1.1.2.2 retrieving revision 1.1.2.3 diff -u -d -r1.1.2.2 -r1.1.2.3 --- test_cmd_line.py 3 Oct 2005 01:03:46 -0000 1.1.2.2 +++ test_cmd_line.py 8 Oct 2005 20:04:55 -0000 1.1.2.3 @@ -17,7 +17,7 @@ def verify_valid_flag(self, cmd_line): data = self.start_python(cmd_line) - self.assertTrue(data.endswith('\n')) + self.assertTrue(data == '' or data.endswith('\n')) self.assertTrue('Traceback' not in data) def test_environment(self): From birkenfeld at users.sourceforge.net Sat Oct 8 22:47:41 2005 From: birkenfeld at users.sourceforge.net (birkenfeld@users.sourceforge.net) Date: Sat, 8 Oct 2005 22:47:41 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib webbrowser.py,1.39,1.40 Message-ID: <20051008204741.A93BB1E4007@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16413/Lib Modified Files: webbrowser.py Log Message: Fix errors in _synthesize because of missing basename attribute of browser controller classes. Index: webbrowser.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/webbrowser.py,v retrieving revision 1.39 retrieving revision 1.40 diff -u -d -r1.39 -r1.40 --- webbrowser.py 4 Oct 2005 03:17:49 -0000 1.39 +++ webbrowser.py 8 Oct 2005 20:47:38 -0000 1.40 @@ -133,6 +133,7 @@ def __init__(self, name=""): self.name = name + self.basename = name def open(self, url, new=0, autoraise=1): raise NotImplementedError @@ -150,6 +151,7 @@ def __init__(self, cmd): self.name, self.args = cmd.split(None, 1) + self.basename = os.path.basename(self.name) def open(self, url, new=0, autoraise=1): assert "'" not in url @@ -358,8 +360,10 @@ commd + " '%s' >/dev/null &")) # Konqueror/kfm, the KDE browser. - if _iscommand("kfm") or _iscommand("konqueror"): - register("kfm", Konqueror, Konqueror()) + if _iscommand("kfm"): + register("kfm", Konqueror, Konqueror("kfm")) + elif _iscommand("konqueror"): + register("konqueror", Konqueror, Konqueror("konqueror")) # Gnome's Galeon and Epiphany for browser in ("galeon", "epiphany"): From doerwalter at users.sourceforge.net Sun Oct 9 21:28:40 2005 From: doerwalter at users.sourceforge.net (doerwalter@users.sourceforge.net) Date: Sun, 9 Oct 2005 21:28:40 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS,1.1387,1.1388 Message-ID: <20051009192840.54D041E4009@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv436/Misc Modified Files: NEWS Log Message: Fix indentation. Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1387 retrieving revision 1.1388 diff -u -d -r1.1387 -r1.1388 --- NEWS 6 Oct 2005 20:29:57 -0000 1.1387 +++ NEWS 9 Oct 2005 19:28:35 -0000 1.1388 @@ -29,7 +29,7 @@ represented as a C int, raise OverflowError. - test__locale is skipped on OS X < 10.4 (only partial locale support is -present). + present). - SF bug #893549: parsing keyword arguments was broken with a few format codes. From doerwalter at users.sourceforge.net Sun Oct 9 21:38:24 2005 From: doerwalter at users.sourceforge.net (doerwalter@users.sourceforge.net) Date: Sun, 9 Oct 2005 21:38:24 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS,1.1388,1.1389 Message-ID: <20051009193824.95A5A1E400A@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3159/Misc Modified Files: NEWS Log Message: Remove trailing spaces. Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1388 retrieving revision 1.1389 diff -u -d -r1.1388 -r1.1389 --- NEWS 9 Oct 2005 19:28:35 -0000 1.1388 +++ NEWS 9 Oct 2005 19:38:21 -0000 1.1389 @@ -194,7 +194,7 @@ - Bug #728515: mmap.resize() now resizes the file on Unix as it did on Windows. -- Patch #1180695: Add nanosecond stat resolution, and st_gen, +- Patch #1180695: Add nanosecond stat resolution, and st_gen, st_birthtime for FreeBSD. - Patch #1231069: The fcntl.ioctl function now uses the 'I' code for @@ -256,7 +256,7 @@ - Patch #754022: Greatly enhanced webbrowser.py (by Oleg Broytmann). -- Bug #729103: pydoc.py: Fix docother() method to accept additional +- Bug #729103: pydoc.py: Fix docother() method to accept additional "parent" argument. - Patch #1300515: xdrlib.py: Fix pack_fstring() to really use null bytes @@ -293,7 +293,7 @@ - Bug #1178484: Return complete lines from codec stream readers even if there is an exception in later lines, resulting in - correct line numbers for decoding errors in source code. + correct line numbers for decoding errors in source code. - Bug #1192315: Disallow negative arguments to clear() in pdb. From doerwalter at users.sourceforge.net Sun Oct 9 21:41:23 2005 From: doerwalter at users.sourceforge.net (doerwalter@users.sourceforge.net) Date: Sun, 9 Oct 2005 21:41:23 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/encodings aliases.py, 1.28, 1.29 Message-ID: <20051009194123.C0C361E4009@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/encodings In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3739/Lib/encodings Modified Files: aliases.py Log Message: Bug #1245379: Add "unicode-1-1-utf-7" as an alias for "utf-7" as specified by RFC 1642. Index: aliases.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/aliases.py,v retrieving revision 1.28 retrieving revision 1.29 diff -u -d -r1.28 -r1.29 --- aliases.py 10 Dec 2004 21:54:35 -0000 1.28 +++ aliases.py 9 Oct 2005 19:41:19 -0000 1.29 @@ -482,6 +482,7 @@ # utf_7 codec 'u7' : 'utf_7', 'utf7' : 'utf_7', + 'unicode_1_1_utf_7' : 'utf_7', # utf_8 codec 'u8' : 'utf_8', From doerwalter at users.sourceforge.net Sun Oct 9 21:41:23 2005 From: doerwalter at users.sourceforge.net (doerwalter@users.sourceforge.net) Date: Sun, 9 Oct 2005 21:41:23 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libcodecs.tex,1.36,1.37 Message-ID: <20051009194123.C24891E400A@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3739/Doc/lib Modified Files: libcodecs.tex Log Message: Bug #1245379: Add "unicode-1-1-utf-7" as an alias for "utf-7" as specified by RFC 1642. Index: libcodecs.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libcodecs.tex,v retrieving revision 1.36 retrieving revision 1.37 diff -u -d -r1.36 -r1.37 --- libcodecs.tex 24 Aug 2005 07:38:12 -0000 1.36 +++ libcodecs.tex 9 Oct 2005 19:41:20 -0000 1.37 @@ -883,7 +883,7 @@ {all languages (BMP only)} \lineiii{utf_7} - {U7} + {U7, unicode-1-1-utf-7} {all languages} \lineiii{utf_8} From doerwalter at users.sourceforge.net Sun Oct 9 21:42:31 2005 From: doerwalter at users.sourceforge.net (doerwalter@users.sourceforge.net) Date: Sun, 9 Oct 2005 21:42:31 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS,1.1389,1.1390 Message-ID: <20051009194231.68DE21E400A@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv4070/Misc Modified Files: NEWS Log Message: Bug #1245379: Add "unicode-1-1-utf-7" as an alias for "utf-7" as specified by RFC 1642. Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1389 retrieving revision 1.1390 diff -u -d -r1.1389 -r1.1390 --- NEWS 9 Oct 2005 19:38:21 -0000 1.1389 +++ NEWS 9 Oct 2005 19:42:27 -0000 1.1390 @@ -517,6 +517,8 @@ - Bug #1202493: Fixing SRE parser to handle '{}' as perl does, rather than considering it exactly like a '*'. +- Bug #1245379: Add "unicode-1-1-utf-7" as an alias for "utf-7" to + ``encodings.aliases``. Build ----- From kbk at users.sourceforge.net Mon Oct 10 02:05:36 2005 From: kbk at users.sourceforge.net (kbk@users.sourceforge.net) Date: Mon, 10 Oct 2005 02:05:36 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/idlelib CallTipWindow.py, 1.7, 1.7.14.1 CallTips.py, 1.12, 1.12.6.1 EditorWindow.py, 1.69, 1.69.2.1 ParenMatch.py, 1.8, 1.8.6.1 PyParse.py, 1.5, 1.5.14.1 PyShell.py, 1.99, 1.99.2.1 config-extensions.def, 1.15, 1.15.6.1 configDialog.py, 1.62, 1.62.2.1 run.py, 1.32, 1.32.2.1 Message-ID: <20051010000536.0E73E1E4074@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/idlelib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22453 Modified Files: Tag: IDLE-syntax-branch CallTipWindow.py CallTips.py EditorWindow.py ParenMatch.py PyParse.py PyShell.py config-extensions.def configDialog.py run.py Log Message: Noam Raphael 'syntax' patch 10Jul05 Index: CallTipWindow.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/idlelib/CallTipWindow.py,v retrieving revision 1.7 retrieving revision 1.7.14.1 diff -u -d -r1.7 -r1.7.14.1 --- CallTipWindow.py 31 Dec 2002 15:59:14 -0000 1.7 +++ CallTipWindow.py 10 Oct 2005 00:05:30 -0000 1.7.14.1 @@ -6,33 +6,65 @@ """ from Tkinter import * +HIDE_VIRTUAL_EVENT_NAME = "<>" +HIDE_SEQUENCES = ("", "") +CHECKHIDE_VIRTUAL_EVENT_NAME = "<>" +CHECKHIDE_SEQUENCES = ("", "") +CHECKHIDE_TIME = 100 # miliseconds + +MARK_RIGHT = "calltipwindowregion_right" + class CallTip: def __init__(self, widget): self.widget = widget - self.tipwindow = None - self.id = None - self.x = self.y = 0 + self.tipwindow = self.label = None + self.parenline = self.parencol = None + self.lastline = None + self.hideid = self.checkhideid = None - def showtip(self, text): - " Display text in calltip window" + def position_window(self): + """Check if needs to reposition the window, and if so - do it.""" + curline = int(self.widget.index("insert").split('.')[0]) + if curline == self.lastline: + return + self.lastline = curline + self.widget.see("insert") + if curline == self.parenline: + box = self.widget.bbox("%d.%d" % (self.parenline, + self.parencol)) + else: + box = self.widget.bbox("%d.0" % curline) + if not box: + box = list(self.widget.bbox("insert")) + # align to left of window + box[0] = 0 + box[2] = 0 + x = box[0] + self.widget.winfo_rootx() + 2 + y = box[1] + box[3] + self.widget.winfo_rooty() + self.tipwindow.wm_geometry("+%d+%d" % (x, y)) + + def showtip(self, text, parenleft, parenright): + """Show the calltip, bind events which will close it and reposition it. + """ # truncate overly long calltip if len(text) >= 79: text = text[:75] + ' ...' self.text = text if self.tipwindow or not self.text: return - self.widget.see("insert") - x, y, cx, cy = self.widget.bbox("insert") - x = x + self.widget.winfo_rootx() + 2 - y = y + cy + self.widget.winfo_rooty() + + self.widget.mark_set(MARK_RIGHT, parenright) + self.parenline, self.parencol = map( + int, self.widget.index(parenleft).split(".")) + self.tipwindow = tw = Toplevel(self.widget) + self.position_window() # XXX 12 Dec 2002 KBK The following command has two effects: It removes # the calltip window border (good) but also causes (at least on # Linux) the calltip to show as a top level window, burning through # any other window dragged over it. Also, shows on all viewports! tw.wm_overrideredirect(1) - tw.wm_geometry("+%d+%d" % (x, y)) try: # This command is only needed and available on Tk >= 8.4.0 for OSX # Without it, call tips intrude on the typing process by grabbing @@ -41,16 +73,66 @@ "help", "noActivates") except TclError: pass - label = Label(tw, text=self.text, justify=LEFT, - background="#ffffe0", relief=SOLID, borderwidth=1, - font = self.widget['font']) - label.pack() + self.label = Label(tw, text=self.text, justify=LEFT, + background="#ffffe0", relief=SOLID, borderwidth=1, + font = self.widget['font']) + self.label.pack() + + self.checkhideid = self.widget.bind(CHECKHIDE_VIRTUAL_EVENT_NAME, + self.checkhide_event) + for seq in CHECKHIDE_SEQUENCES: + self.widget.event_add(CHECKHIDE_VIRTUAL_EVENT_NAME, seq) + self.widget.after(CHECKHIDE_TIME, self.checkhide_event) + self.hideid = self.widget.bind(HIDE_VIRTUAL_EVENT_NAME, + self.hide_event) + for seq in HIDE_SEQUENCES: + self.widget.event_add(HIDE_VIRTUAL_EVENT_NAME, seq) + + def checkhide_event(self, event=None): + if not self.tipwindow: + # If the event was triggered by the same event that unbinded + # this function, the function will be called nevertheless, + # so do nothing in this case. + return + curline, curcol = map(int, self.widget.index("insert").split('.')) + if curline < self.parenline or \ + (curline == self.parenline and curcol <= self.parencol) or \ + self.widget.compare("insert", ">", MARK_RIGHT): + self.hidetip() + else: + self.position_window() + self.widget.after(CHECKHIDE_TIME, self.checkhide_event) + + def hide_event(self, event): + if not self.tipwindow: + # See the explanation in checkhide_event. + return + self.hidetip() def hidetip(self): - tw = self.tipwindow + if not self.tipwindow: + return + + for seq in CHECKHIDE_SEQUENCES: + self.widget.event_delete(CHECKHIDE_VIRTUAL_EVENT_NAME, seq) + self.widget.unbind(CHECKHIDE_VIRTUAL_EVENT_NAME, self.checkhideid) + self.checkhideid = None + for seq in HIDE_SEQUENCES: + self.widget.event_delete(HIDE_VIRTUAL_EVENT_NAME, seq) + self.widget.unbind(HIDE_VIRTUAL_EVENT_NAME, self.hideid) + self.hideid = None + + self.label.destroy() + self.label = None + self.tipwindow.destroy() self.tipwindow = None - if tw: - tw.destroy() + + self.widget.mark_unset(MARK_RIGHT) + self.parenline = self.parencol = self.lastline = None + + def is_active(self): + return bool(self.tipwindow) + ############################### Index: CallTips.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/idlelib/CallTips.py,v retrieving revision 1.12 retrieving revision 1.12.6.1 diff -u -d -r1.12 -r1.12.6.1 --- CallTips.py 4 May 2004 08:34:56 -0000 1.12 +++ CallTips.py 10 Oct 2005 00:05:30 -0000 1.12.6.1 @@ -3,21 +3,21 @@ Call Tips are floating windows which display function, class, and method parameter and docstring information when you type an opening parenthesis, and which disappear when you type a closing parenthesis. - -Future plans include extending the functionality to include class attributes. - """ import sys -import string import types import CallTipWindow +from HyperParser import HyperParser import __main__ class CallTips: menudefs = [ + ('edit', [ + ("Show call tip", "<>"), + ]) ] def __init__(self, editwin=None): @@ -36,51 +36,47 @@ # See __init__ for usage return CallTipWindow.CallTip(self.text) - def _remove_calltip_window(self): + def _remove_calltip_window(self, event=None): if self.calltip: self.calltip.hidetip() self.calltip = None - def paren_open_event(self, event): - self._remove_calltip_window() - name = self.get_name_at_cursor() - arg_text = self.fetch_tip(name) - if arg_text: - self.calltip_start = self.text.index("insert") - self.calltip = self._make_calltip_window() - self.calltip.showtip(arg_text) - return "" #so the event is handled normally. + def force_open_calltip_event(self, event): + """Happens when the user really wants to open a CallTip, even if a + function call is needed. + """ + self.open_calltip(True) - def paren_close_event(self, event): - # Now just hides, but later we should check if other - # paren'd expressions remain open. - self._remove_calltip_window() - return "" #so the event is handled normally. + def try_open_calltip_event(self, event): + """Happens when it would be nice to open a CallTip, but not really + neccesary, for example after an opening bracket, so function calls + won't be made. + """ + self.open_calltip(False) - def check_calltip_cancel_event(self, event): - if self.calltip: - # If we have moved before the start of the calltip, - # or off the calltip line, then cancel the tip. - # (Later need to be smarter about multi-line, etc) - if self.text.compare("insert", "<=", self.calltip_start) or \ - self.text.compare("insert", ">", self.calltip_start - + " lineend"): - self._remove_calltip_window() - return "" #so the event is handled normally. + def refresh_calltip_event(self, event): + """If there is already a calltip window, check if it is still needed, + and if so, reload it. + """ + if self.calltip and self.calltip.is_active(): + self.open_calltip(False) - def calltip_cancel_event(self, event): + def open_calltip(self, evalfuncs): self._remove_calltip_window() - return "" #so the event is handled normally. - - __IDCHARS = "._" + string.ascii_letters + string.digits - def get_name_at_cursor(self): - idchars = self.__IDCHARS - str = self.text.get("insert linestart", "insert") - i = len(str) - while i and str[i-1] in idchars: - i -= 1 - return str[i:] + hp = HyperParser(self.editwin, "insert") + sur_paren = hp.get_surrounding_brackets('(') + if not sur_paren: + return + hp.set_index(sur_paren[0]) + name = hp.get_expression() + if not name or (not evalfuncs and name.find('(') != -1): + return + arg_text = self.fetch_tip(name) + if not arg_text: + return + self.calltip = self._make_calltip_window() + self.calltip.showtip(arg_text, sur_paren[0], sur_paren[1]) def fetch_tip(self, name): """Return the argument list and docstring of a function or class @@ -127,7 +123,7 @@ return None def get_arg_text(ob): - "Get a string describing the arguments for the given object" + """Get a string describing the arguments for the given object""" argText = "" if ob is not None: argOffset = 0 @@ -150,7 +146,7 @@ try: realArgs = fob.func_code.co_varnames[argOffset:fob.func_code.co_argcount] defaults = fob.func_defaults or [] - defaults = list(map(lambda name: "=%s" % name, defaults)) + defaults = list(map(lambda name: "=%s" % repr(name), defaults)) defaults = [""] * (len(realArgs)-len(defaults)) + defaults items = map(lambda arg, dflt: arg+dflt, realArgs, defaults) if fob.func_code.co_flags & 0x4: Index: EditorWindow.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/idlelib/EditorWindow.py,v retrieving revision 1.69 retrieving revision 1.69.2.1 diff -u -d -r1.69 -r1.69.2.1 --- EditorWindow.py 12 Jun 2005 05:19:23 -0000 1.69 +++ EditorWindow.py 10 Oct 2005 00:05:30 -0000 1.69.2.1 @@ -6,6 +6,7 @@ from Tkinter import * import tkSimpleDialog import tkMessageBox +from MultiCall import MultiCallCreator import webbrowser import idlever @@ -89,7 +90,8 @@ self.vbar = vbar = Scrollbar(top, name='vbar') self.text_frame = text_frame = Frame(top) self.width = idleConf.GetOption('main','EditorWindow','width') - self.text = text = Text(text_frame, name='text', padx=5, wrap='none', + self.text = text = MultiCallCreator(Text)( + text_frame, name='text', padx=5, wrap='none', foreground=idleConf.GetHighlight(currentTheme, 'normal',fgBg='fg'), background=idleConf.GetHighlight(currentTheme, @@ -264,8 +266,9 @@ self.status_bar.set_label('column', 'Col: ?', side=RIGHT) self.status_bar.set_label('line', 'Ln: ?', side=RIGHT) self.status_bar.pack(side=BOTTOM, fill=X) - self.text.bind('', self.set_line_and_column) - self.text.bind('', self.set_line_and_column) + self.text.bind("<>", self.set_line_and_column) + self.text.event_add("<>", + "", "") self.text.after_idle(self.set_line_and_column) def set_line_and_column(self, event=None): @@ -355,6 +358,9 @@ return "break" def copy(self,event): + if not self.text.tag_ranges("sel"): + # There is no selection, so do nothing and maybe interrupt. + return self.text.event_generate("<>") return "break" @@ -557,14 +563,28 @@ idleConf.GetOption('main','EditorWindow','font-size'), fontWeight)) - def ResetKeybindings(self): - "Update the keybindings if they are changed" + def RemoveKeybindings(self): + "Remove the keybindings before they are changed." # Called from configDialog.py self.Bindings.default_keydefs=idleConf.GetCurrentKeySet() keydefs = self.Bindings.default_keydefs for event, keylist in keydefs.items(): - self.text.event_delete(event) + self.text.event_delete(event, *keylist) + for extensionName in self.get_standard_extension_names(): + keydefs = idleConf.GetExtensionBindings(extensionName) + if keydefs: + for event, keylist in keydefs.items(): + self.text.event_delete(event, *keylist) + + def ApplyKeybindings(self): + "Update the keybindings after they are changed" + # Called from configDialog.py + self.Bindings.default_keydefs=idleConf.GetCurrentKeySet() self.apply_bindings() + for extensionName in self.get_standard_extension_names(): + keydefs = idleConf.GetExtensionBindings(extensionName) + if keydefs: + self.apply_bindings(keydefs) #update menu accelerators menuEventDict={} for menu in self.Bindings.menudefs: @@ -1064,17 +1084,28 @@ # open/close first need to find the last stmt lno = index2line(text.index('insert')) y = PyParse.Parser(self.indentwidth, self.tabwidth) - for context in self.num_context_lines: - startat = max(lno - context, 1) - startatindex = repr(startat) + ".0" + if not self.context_use_ps1: + for context in self.num_context_lines: + startat = max(lno - context, 1) + startatindex = `startat` + ".0" + rawtext = text.get(startatindex, "insert") + y.set_str(rawtext) + bod = y.find_good_parse_start( + self.context_use_ps1, + self._build_char_in_string_func(startatindex)) + if bod is not None or startat == 1: + break + y.set_lo(bod or 0) + else: + r = text.tag_prevrange("console", "insert") + if r: + startatindex = r[1] + else: + startatindex = "1.0" rawtext = text.get(startatindex, "insert") y.set_str(rawtext) - bod = y.find_good_parse_start( - self.context_use_ps1, - self._build_char_in_string_func(startatindex)) - if bod is not None or startat == 1: - break - y.set_lo(bod or 0) + y.set_lo(0) + c = y.get_continuation_type() if c != PyParse.C_NONE: # The current stmt hasn't ended yet. Index: ParenMatch.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/idlelib/ParenMatch.py,v retrieving revision 1.8 retrieving revision 1.8.6.1 diff -u -d -r1.8 -r1.8.6.1 --- ParenMatch.py 12 Feb 2004 17:35:09 -0000 1.8 +++ ParenMatch.py 10 Oct 2005 00:05:30 -0000 1.8.6.1 @@ -3,17 +3,14 @@ When you hit a right paren, the cursor should move briefly to the left paren. Paren here is used generically; the matching applies to parentheses, square brackets, and curly braces. - -WARNING: This extension will fight with the CallTips extension, -because they both are interested in the KeyRelease-parenright event. -We'll have to fix IDLE to do something reasonable when two or more -extensions what to capture the same event. """ -import PyParse -from EditorWindow import EditorWindow, index2line +from HyperParser import HyperParser from configHandler import idleConf +keysym_opener = {"parenright":'(', "bracketright":'[', "braceright":'{'} +CHECK_DELAY = 100 # miliseconds + class ParenMatch: """Highlight matching parentheses @@ -31,7 +28,6 @@ expression from the left paren to the right paren. TODO: - - fix interaction with CallTips - extend IDLE with configuration dialog to change options - implement rest of Emacs highlight styles (see below) - print mismatch warning in IDLE status window @@ -41,7 +37,11 @@ to the right of a right paren. I don't know how to do that in Tk, so I haven't bothered. """ - menudefs = [] + menudefs = [ + ('edit', [ + ("Show surrounding parens", "<>"), + ]) + ] STYLE = idleConf.GetOption('extensions','ParenMatch','style', default='expression') FLASH_DELAY = idleConf.GetOption('extensions','ParenMatch','flash-delay', @@ -50,14 +50,36 @@ BELL = idleConf.GetOption('extensions','ParenMatch','bell', type='bool',default=1) + RESTORE_VIRTUAL_EVENT_NAME = "<>" + # We want the restore event be called before the usual return and + # backspace events. + RESTORE_SEQUENCES = ("", "", + "", "") + def __init__(self, editwin): self.editwin = editwin self.text = editwin.text - self.finder = LastOpenBracketFinder(editwin) + # Bind the check-restore event to the function restore_event, + # so that we can then use activate_restore (which calls event_add) + # and deactivate_restore (which calls event_delete). + editwin.text.bind(self.RESTORE_VIRTUAL_EVENT_NAME, + self.restore_event) self.counter = 0 - self._restore = None + self.is_restore_active = 0 self.set_style(self.STYLE) + def activate_restore(self): + if not self.is_restore_active: + for seq in self.RESTORE_SEQUENCES: + self.text.event_add(self.RESTORE_VIRTUAL_EVENT_NAME, seq) + self.is_restore_active = True + + def deactivate_restore(self): + if self.is_restore_active: + for seq in self.RESTORE_SEQUENCES: + self.text.event_delete(self.RESTORE_VIRTUAL_EVENT_NAME, seq) + self.is_restore_active = False + def set_style(self, style): self.STYLE = style if style == "default": @@ -67,23 +89,38 @@ self.create_tag = self.create_tag_expression self.set_timeout = self.set_timeout_none - def flash_open_paren_event(self, event): - index = self.finder.find(keysym_type(event.keysym)) - if index is None: + def flash_paren_event(self, event): + indices = HyperParser(self.editwin, "insert").get_surrounding_brackets() + if indices is None: self.warn_mismatched() return - self._restore = 1 - self.create_tag(index) + self.activate_restore() + self.create_tag(indices) + self.set_timeout_last() + + def paren_closed_event(self, event): + # If it was a shortcut and not really a closing paren, quit. + if self.text.get("insert-1c") not in (')',']','}'): + return + hp = HyperParser(self.editwin, "insert-1c") + if not hp.is_in_code(): + return + indices = hp.get_surrounding_brackets(keysym_opener[event.keysym], True) + if indices is None: + self.warn_mismatched() + return + self.activate_restore() + self.create_tag(indices) self.set_timeout() - def check_restore_event(self, event=None): - if self._restore: - self.text.tag_delete("paren") - self._restore = None + def restore_event(self, event=None): + self.text.tag_delete("paren") + self.deactivate_restore() + self.counter += 1 # disable the last timer, if there is one. def handle_restore_timer(self, timer_count): - if timer_count + 1 == self.counter: - self.check_restore_event() + if timer_count == self.counter: + self.restore_event() def warn_mismatched(self): if self.BELL: @@ -92,87 +129,44 @@ # any one of the create_tag_XXX methods can be used depending on # the style - def create_tag_default(self, index): + def create_tag_default(self, indices): """Highlight the single paren that matches""" - self.text.tag_add("paren", index) + self.text.tag_add("paren", indices[0]) self.text.tag_config("paren", self.HILITE_CONFIG) - def create_tag_expression(self, index): + def create_tag_expression(self, indices): """Highlight the entire expression""" - self.text.tag_add("paren", index, "insert") + if self.text.get(indices[1]) in (')', ']', '}'): + rightindex = indices[1]+"+1c" + else: + rightindex = indices[1] + self.text.tag_add("paren", indices[0], rightindex) self.text.tag_config("paren", self.HILITE_CONFIG) # any one of the set_timeout_XXX methods can be used depending on # the style def set_timeout_none(self): - """Highlight will remain until user input turns it off""" - pass + """Highlight will remain until user input turns it off + or the insert has moved""" + # After CHECK_DELAY, call a function which disables the "paren" tag + # if the event is for the most recent timer and the insert has changed, + # or schedules another call for itself. + self.counter += 1 + def callme(callme, self=self, c=self.counter, + index=self.text.index("insert")): + if index != self.text.index("insert"): + self.handle_restore_timer(c) + else: + self.editwin.text_frame.after(CHECK_DELAY, callme, callme) + self.editwin.text_frame.after(CHECK_DELAY, callme, callme) def set_timeout_last(self): """The last highlight created will be removed after .5 sec""" # associate a counter with an event; only disable the "paren" # tag if the event is for the most recent timer. + self.counter += 1 self.editwin.text_frame.after(self.FLASH_DELAY, lambda self=self, c=self.counter: \ self.handle_restore_timer(c)) - self.counter = self.counter + 1 - -def keysym_type(ks): - # Not all possible chars or keysyms are checked because of the - # limited context in which the function is used. - if ks == "parenright" or ks == "(": - return "paren" - if ks == "bracketright" or ks == "[": - return "bracket" - if ks == "braceright" or ks == "{": - return "brace" - -class LastOpenBracketFinder: - num_context_lines = EditorWindow.num_context_lines - indentwidth = EditorWindow.indentwidth - tabwidth = EditorWindow.tabwidth - context_use_ps1 = EditorWindow.context_use_ps1 - def __init__(self, editwin): - self.editwin = editwin - self.text = editwin.text - - def _find_offset_in_buf(self, lno): - y = PyParse.Parser(self.indentwidth, self.tabwidth) - for context in self.num_context_lines: - startat = max(lno - context, 1) - startatindex = repr(startat) + ".0" - # rawtext needs to contain everything up to the last - # character, which was the close paren. the parser also - # requires that the last line ends with "\n" - rawtext = self.text.get(startatindex, "insert")[:-1] + "\n" - y.set_str(rawtext) - bod = y.find_good_parse_start( - self.context_use_ps1, - self._build_char_in_string_func(startatindex)) - if bod is not None or startat == 1: - break - y.set_lo(bod or 0) - i = y.get_last_open_bracket_pos() - return i, y.str - - def find(self, right_keysym_type): - """Return the location of the last open paren""" - lno = index2line(self.text.index("insert")) - i, buf = self._find_offset_in_buf(lno) - if i is None \ - or keysym_type(buf[i]) != right_keysym_type: - return None - lines_back = buf[i:].count("\n") - 1 - # subtract one for the "\n" added to please the parser - upto_open = buf[:i] - j = upto_open.rfind("\n") + 1 # offset of column 0 of line - offset = i - j - return "%d.%d" % (lno - lines_back, offset) - - def _build_char_in_string_func(self, startindex): - def inner(offset, startindex=startindex, - icis=self.editwin.is_char_in_string): - return icis(startindex + "%dc" % offset) - return inner Index: PyParse.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/idlelib/PyParse.py,v retrieving revision 1.5 retrieving revision 1.5.14.1 diff -u -d -r1.5 -r1.5.14.1 --- PyParse.py 17 Sep 2002 03:55:13 -0000 1.5 +++ PyParse.py 10 Oct 2005 00:05:30 -0000 1.5.14.1 @@ -13,9 +13,7 @@ _synchre = re.compile(r""" ^ [ \t]* - (?: if - | for - | while + (?: while | else | def | return @@ -144,29 +142,11 @@ # This will be reliable iff given a reliable is_char_in_string # function, meaning that when it says "no", it's absolutely # guaranteed that the char is not in a string. - # - # Ack, hack: in the shell window this kills us, because there's - # no way to tell the differences between output, >>> etc and - # user input. Indeed, IDLE's first output line makes the rest - # look like it's in an unclosed paren!: - # Python 1.5.2 (#0, Apr 13 1999, ... - def find_good_parse_start(self, use_ps1, is_char_in_string=None, + def find_good_parse_start(self, is_char_in_string=None, _synchre=_synchre): str, pos = self.str, None - if use_ps1: - # shell window - ps1 = '\n' + sys.ps1 - i = str.rfind(ps1) - if i >= 0: - pos = i + len(ps1) - # make it look like there's a newline instead - # of ps1 at the start -- hacking here once avoids - # repeated hackery later - self.str = str[:pos-1] + '\n' + str[pos:] - return pos - # File window -- real work. if not is_char_in_string: # no clue -- make the caller pass everything return None @@ -355,6 +335,11 @@ # Creates: # self.stmt_start, stmt_end # slice indices of last interesting stmt + # self.stmt_bracketing + # the bracketing structure of the last interesting stmt; + # for example, for the statement "say(boo) or die", stmt_bracketing + # will be [(0, 0), (3, 1), (8, 0)]. Strings and comments are + # treated as brackets, for the matter. # self.lastch # last non-whitespace character before optional trailing # comment @@ -396,6 +381,7 @@ lastch = "" stack = [] # stack of open bracket indices push_stack = stack.append + bracketing = [(p, 0)] while p < q: # suck up all except ()[]{}'"#\\ m = _chew_ordinaryre(str, p, q) @@ -416,6 +402,7 @@ if ch in "([{": push_stack(p) + bracketing.append((p, len(stack))) lastch = ch p = p+1 continue @@ -425,6 +412,7 @@ del stack[-1] lastch = ch p = p+1 + bracketing.append((p, len(stack))) continue if ch == '"' or ch == "'": @@ -435,14 +423,18 @@ # strings to a couple of characters per line. study1 # also needed to keep track of newlines, and we don't # have to. + bracketing.append((p, len(stack)+1)) lastch = ch p = _match_stringre(str, p, q).end() + bracketing.append((p, len(stack))) continue if ch == '#': # consume comment and trailing newline + bracketing.append((p, len(stack)+1)) p = str.find('\n', p, q) + 1 assert p > 0 + bracketing.append((p, len(stack))) continue assert ch == '\\' @@ -458,6 +450,7 @@ self.lastch = lastch if stack: self.lastopenbracketpos = stack[-1] + self.stmt_bracketing = tuple(bracketing) # Assuming continuation is C_BRACKET, return the number # of spaces the next line should be indented. @@ -582,3 +575,12 @@ def get_last_open_bracket_pos(self): self._study2() return self.lastopenbracketpos + + # the structure of the bracketing of the last interesting statement, + # in the format defined in _study2, or None if the text didn't contain + # anything + stmt_bracketing = None + + def get_last_stmt_bracketing(self): + self._study2() + return self.stmt_bracketing Index: PyShell.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/idlelib/PyShell.py,v retrieving revision 1.99 retrieving revision 1.99.2.1 diff -u -d -r1.99 -r1.99.2.1 --- PyShell.py 1 Sep 2005 00:39:02 -0000 1.99 +++ PyShell.py 10 Oct 2005 00:05:30 -0000 1.99.2.1 @@ -1091,11 +1091,12 @@ self.recall(self.text.get(next[0], next[1]), event) return "break" # No stdin mark -- just get the current line, less any prompt - line = self.text.get("insert linestart", "insert lineend") - last_line_of_prompt = sys.ps1.split('\n')[-1] - if line.startswith(last_line_of_prompt): - line = line[len(last_line_of_prompt):] - self.recall(line, event) + indices = self.text.tag_nextrange("console", "insert linestart") + if indices and \ + self.text.compare(indices[0], "<=", "insert linestart"): + self.recall(self.text.get(indices[1], "insert lineend"), event) + else: + self.recall(self.text.get("insert linestart", "insert lineend"), event) return "break" # If we're between the beginning of the line and the iomark, i.e. # in the prompt area, move to the end of the prompt Index: config-extensions.def =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/idlelib/config-extensions.def,v retrieving revision 1.15 retrieving revision 1.15.6.1 diff -u -d -r1.15 -r1.15.6.1 --- config-extensions.def 6 Jun 2004 01:29:22 -0000 1.15 +++ config-extensions.def 10 Oct 2005 00:05:30 -0000 1.15.6.1 @@ -52,22 +52,30 @@ [CallTips] enable=1 +[CallTips_cfgBindings] +force-open-calltip= [CallTips_bindings] -paren-open= -paren-close= -check-calltip-cancel= -calltip-cancel= +try-open-calltip= +refresh-calltip= [ParenMatch] -enable=0 +enable=1 style= expression flash-delay= 500 bell= 1 -hilite-foreground= black -hilite-background= #43cd80 +[ParenMatch_cfgBindings] +flash-paren= [ParenMatch_bindings] -flash-open-paren= -check-restore= +paren-closed= + +[AutoComplete] +enable=1 +popupwait=0 +[AutoComplete_cfgBindings] +force-open-completions= +[AutoComplete_bindings] +autocomplete= +try-open-completions= [CodeContext] enable=1 Index: configDialog.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/idlelib/configDialog.py,v retrieving revision 1.62 retrieving revision 1.62.2.1 diff -u -d -r1.62 -r1.62.2.1 --- configDialog.py 22 Jul 2005 21:49:29 -0000 1.62 +++ configDialog.py 10 Oct 2005 00:05:30 -0000 1.62.2.1 @@ -1106,6 +1106,13 @@ idleConf.userCfg[configType].Save() self.ResetChangedItems() #clear the changed items dict + def DeactivateCurrentConfig(self): + #Before a config is saved, some cleanup of current + #config must be done - remove the previous keybindings + winInstances=self.parent.instance_dict.keys() + for instance in winInstances: + instance.RemoveKeybindings() + def ActivateConfigChanges(self): "Dynamically apply configuration changes" winInstances=self.parent.instance_dict.keys() @@ -1113,7 +1120,7 @@ instance.ResetColorizer() instance.ResetFont() instance.set_notabs_indentwidth() - instance.ResetKeybindings() + instance.ApplyKeybindings() instance.reset_help_menu_entries() def Cancel(self): @@ -1124,6 +1131,7 @@ self.destroy() def Apply(self): + self.DeactivateCurrentConfig() self.SaveAllChangedConfigs() self.ActivateConfigChanges() Index: run.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/idlelib/run.py,v retrieving revision 1.32 retrieving revision 1.32.2.1 diff -u -d -r1.32 -r1.32.2.1 --- run.py 5 May 2005 23:29:54 -0000 1.32 +++ run.py 10 Oct 2005 00:05:31 -0000 1.32.2.1 @@ -9,6 +9,8 @@ import Queue import CallTips +import AutoComplete + import RemoteDebugger import RemoteObjectBrowser import StackViewer @@ -275,6 +277,7 @@ self.rpchandler = rpchandler self.locals = __main__.__dict__ self.calltip = CallTips.CallTips() + self.autocomplete = AutoComplete.AutoComplete() def runcode(self, code): try: @@ -305,6 +308,9 @@ def get_the_calltip(self, name): return self.calltip.fetch_tip(name) + def get_the_completion_list(self, what, mode): + return self.autocomplete.fetch_completions(what, mode) + def stackviewer(self, flist_oid=None): if self.usr_exc_info: typ, val, tb = self.usr_exc_info From jhylton at users.sourceforge.net Mon Oct 10 17:50:45 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Mon, 10 Oct 2005 17:50:45 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_doctest.py, 1.4.14.2, 1.4.14.3 Message-ID: <20051010155045.D9E271E4007@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20137 Modified Files: Tag: ast-branch test_doctest.py Log Message: Compiler now generates the name "" instead of "?" Index: test_doctest.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_doctest.py,v retrieving revision 1.4.14.2 retrieving revision 1.4.14.3 diff -u -d -r1.4.14.2 -r1.4.14.3 --- test_doctest.py 7 Jan 2005 06:58:52 -0000 1.4.14.2 +++ test_doctest.py 10 Oct 2005 15:50:41 -0000 1.4.14.3 @@ -1556,11 +1556,11 @@ >>> try: doctest.debug_src(s) ... finally: sys.stdin = real_stdin - > (1)?() + > (1)() (Pdb) next 12 --Return-- - > (1)?()->None + > (1)()->None (Pdb) print x 12 (Pdb) continue @@ -1598,7 +1598,7 @@ >>> try: runner.run(test) ... finally: sys.stdin = real_stdin --Return-- - > (1)?()->None + > (1)()->None -> import pdb; pdb.set_trace() (Pdb) print x 42 @@ -1634,7 +1634,7 @@ (Pdb) print y 2 (Pdb) up - > (1)?() + > (1)() -> calls_set_trace() (Pdb) print x 1 @@ -1683,7 +1683,7 @@ [EOF] (Pdb) next --Return-- - > (1)?()->None + > (1)()->None -> f(3) (Pdb) list 1 -> f(3) @@ -1776,7 +1776,7 @@ (Pdb) print y 1 (Pdb) up - > (1)?() + > (1)() -> calls_set_trace() (Pdb) print foo *** NameError: name 'foo' is not defined From kbk at users.sourceforge.net Mon Oct 10 19:37:51 2005 From: kbk at users.sourceforge.net (kbk@users.sourceforge.net) Date: Mon, 10 Oct 2005 19:37:51 +0200 (CEST) Subject: [Python-checkins] python/nondist/peps pep-0227.txt,1.10,1.11 Message-ID: <20051010173751.011C91E400A@bag.python.org> Update of /cvsroot/python/python/nondist/peps In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21834 Modified Files: pep-0227.txt Log Message: Fix type and dead link Index: pep-0227.txt =================================================================== RCS file: /cvsroot/python/python/nondist/peps/pep-0227.txt,v retrieving revision 1.10 retrieving revision 1.11 diff -u -d -r1.10 -r1.11 --- pep-0227.txt 12 Jun 2003 13:54:00 -0000 1.10 +++ pep-0227.txt 10 Oct 2005 17:37:48 -0000 1.11 @@ -118,9 +118,9 @@ contained within a class definition, the name bindings that occur in the class block are not visible to enclosed functions.) - A class definition is an executable statement that may uses and - definitions of names. These references follow the normal rules - for name resolution. The namespace of the class definition + A class definition is an executable statement that may contain + uses and definitions of names. These references follow the normal + rules for name resolution. The namespace of the class definition becomes the attribute dictionary of the class. The following operations are name binding operations. If they @@ -489,7 +489,7 @@ [1] Luca Cardelli. Compiling a functional language. In Proc. of the 1984 ACM Conference on Lisp and Functional Programming, pp. 208-217, Aug. 1984 - http://citeseer.nj.nec.com/cardelli84compiling.html + http://citeseer.ist.psu.edu/cardelli84compiling.html Copyright From lemburg at users.sourceforge.net Mon Oct 10 21:08:45 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Mon, 10 Oct 2005 21:08:45 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/api concrete.tex,1.68,1.69 Message-ID: <20051010190845.78DEA1E4007@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/api In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19085/Doc/api Modified Files: concrete.tex Log Message: Clarify the docs for Py_UNICODE. Index: concrete.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/api/concrete.tex,v retrieving revision 1.68 retrieving revision 1.69 diff -u -d -r1.68 -r1.69 --- concrete.tex 6 Oct 2005 20:29:57 -0000 1.68 +++ concrete.tex 10 Oct 2005 19:08:41 -0000 1.69 @@ -787,14 +787,24 @@ implementation in Python: \begin{ctypedesc}{Py_UNICODE} - This type represents a 16-bit unsigned storage type which is used by - Python internally as basis for holding Unicode ordinals. On - platforms where \ctype{wchar_t} is available and also has 16-bits, - \ctype{Py_UNICODE} is a typedef alias for \ctype{wchar_t} to enhance - native platform compatibility. On all other platforms, - \ctype{Py_UNICODE} is a typedef alias for \ctype{unsigned short}. + This type represents the storage type which is used by Python + internally as basis for holding Unicode ordinals. Python's default + builds use a 16-bit type for \ctype{Py_UNICODE} and store Unicode + values internally as UCS2. It is also possible to build a UCS4 + version of Python (most recent Linux distributions come with UCS4 + builds of Python). These builds then use a 32-bit type for + \ctype{Py_UNICODE} and store Unicode data internally as UCS4. On + platforms where \ctype{wchar_t} is available and compatible with the + chosen Python Unicode build variant, \ctype{Py_UNICODE} is a typedef + alias for \ctype{wchar_t} to enhance native platform compatibility. + On all other platforms, \ctype{Py_UNICODE} is a typedef alias for + either \ctype{unsigned short} (UCS2) or \ctype{unsigned long} + (UCS4). \end{ctypedesc} +Note that UCS2 and UCS4 Python builds are not binary compatible. +Please keep this in mind when writing extensions or interfaces. + \begin{ctypedesc}{PyUnicodeObject} This subtype of \ctype{PyObject} represents a Python Unicode object. \end{ctypedesc} From montanaro at users.sourceforge.net Tue Oct 11 00:03:17 2005 From: montanaro at users.sourceforge.net (montanaro@users.sourceforge.net) Date: Tue, 11 Oct 2005 00:03:17 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/api concrete.tex, 1.58.2.4, 1.58.2.5 Message-ID: <20051010220317.2AA7B1E400A@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/api In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv4229 Modified Files: Tag: release24-maint concrete.tex Log Message: backport Py_UNICODE clarification Index: concrete.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/api/concrete.tex,v retrieving revision 1.58.2.4 retrieving revision 1.58.2.5 diff -u -d -r1.58.2.4 -r1.58.2.5 --- concrete.tex 28 Sep 2005 12:53:22 -0000 1.58.2.4 +++ concrete.tex 10 Oct 2005 22:03:12 -0000 1.58.2.5 @@ -787,14 +787,24 @@ implementation in Python: \begin{ctypedesc}{Py_UNICODE} - This type represents a 16-bit unsigned storage type which is used by - Python internally as basis for holding Unicode ordinals. On - platforms where \ctype{wchar_t} is available and also has 16-bits, - \ctype{Py_UNICODE} is a typedef alias for \ctype{wchar_t} to enhance - native platform compatibility. On all other platforms, - \ctype{Py_UNICODE} is a typedef alias for \ctype{unsigned short}. + This type represents the storage type which is used by Python + internally as basis for holding Unicode ordinals. Python's default + builds use a 16-bit type for \ctype{Py_UNICODE} and store Unicode + values internally as UCS2. It is also possible to build a UCS4 + version of Python (most recent Linux distributions come with UCS4 + builds of Python). These builds then use a 32-bit type for + \ctype{Py_UNICODE} and store Unicode data internally as UCS4. On + platforms where \ctype{wchar_t} is available and compatible with the + chosen Python Unicode build variant, \ctype{Py_UNICODE} is a typedef + alias for \ctype{wchar_t} to enhance native platform compatibility. + On all other platforms, \ctype{Py_UNICODE} is a typedef alias for + either \ctype{unsigned short} (UCS2) or \ctype{unsigned long} + (UCS4). \end{ctypedesc} +Note that UCS2 and UCS4 Python builds are not binary compatible. +Please keep this in mind when writing extensions or interfaces. + \begin{ctypedesc}{PyUnicodeObject} This subtype of \ctype{PyObject} represents a Python Unicode object. \end{ctypedesc} From nnorwitz at users.sourceforge.net Tue Oct 11 05:23:48 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Tue, 11 Oct 2005 05:23:48 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libnew.tex,1.10,1.11 Message-ID: <20051011032348.6ED421E4007@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20449/lib Modified Files: libnew.tex Log Message: SF bug #1323294, Minor error in the Library Reference doc. Will backport Index: libnew.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libnew.tex,v retrieving revision 1.10 retrieving revision 1.11 diff -u -d -r1.10 -r1.11 --- libnew.tex 17 Aug 2004 02:31:55 -0000 1.10 +++ libnew.tex 11 Oct 2005 03:23:45 -0000 1.11 @@ -47,9 +47,10 @@ %XXX This is still undocumented!!!!!!!!!!! \end{funcdesc} -\begin{funcdesc}{module}{name} +\begin{funcdesc}{module}{name[, doc]} This function returns a new module object with name \var{name}. \var{name} must be a string. +The optional \var{doc} argument can have any type. \end{funcdesc} \begin{funcdesc}{classobj}{name, baseclasses, dict} From nnorwitz at users.sourceforge.net Tue Oct 11 05:24:32 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Tue, 11 Oct 2005 05:24:32 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libnew.tex, 1.10, 1.10.2.1 Message-ID: <20051011032432.1F4A71E4007@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20583/Doc/lib Modified Files: Tag: release24-maint libnew.tex Log Message: Backport: SF bug #1323294, Minor error in the Library Reference doc. Index: libnew.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libnew.tex,v retrieving revision 1.10 retrieving revision 1.10.2.1 diff -u -d -r1.10 -r1.10.2.1 --- libnew.tex 17 Aug 2004 02:31:55 -0000 1.10 +++ libnew.tex 11 Oct 2005 03:24:28 -0000 1.10.2.1 @@ -47,9 +47,10 @@ %XXX This is still undocumented!!!!!!!!!!! \end{funcdesc} -\begin{funcdesc}{module}{name} +\begin{funcdesc}{module}{name[, doc]} This function returns a new module object with name \var{name}. \var{name} must be a string. +The optional \var{doc} argument can have any type. \end{funcdesc} \begin{funcdesc}{classobj}{name, baseclasses, dict} From vsajip at users.sourceforge.net Tue Oct 11 15:15:34 2005 From: vsajip at users.sourceforge.net (vsajip@users.sourceforge.net) Date: Tue, 11 Oct 2005 15:15:34 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/logging handlers.py, 1.21, 1.22 Message-ID: <20051011131534.A557A1E400B@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/logging In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17965 Modified Files: handlers.py Log Message: Added Host and Content-type headers to requests sent by HTTPHandler (suggested by Steven Vereecken) Index: handlers.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/logging/handlers.py,v retrieving revision 1.21 retrieving revision 1.22 diff -u -d -r1.21 -r1.22 --- handlers.py 13 Mar 2005 09:56:36 -0000 1.21 +++ handlers.py 11 Oct 2005 13:15:31 -0000 1.22 @@ -854,7 +854,8 @@ """ try: import httplib, urllib - h = httplib.HTTP(self.host) + host = self.host + h = httplib.HTTP(host) url = self.url data = urllib.urlencode(self.mapLogRecord(record)) if self.method == "GET": @@ -864,7 +865,15 @@ sep = '?' url = url + "%c%s" % (sep, data) h.putrequest(self.method, url) + # support multiple hosts on one IP address... + # need to strip optional :port from host, if present + i = string.find(host, ":") + if i >= 0: + host = host[:i] + h.putheader("Host", host) if self.method == "POST": + h.putheader("Content-type", + "application/x-www-form-urlencoded") h.putheader("Content-length", str(len(data))) h.endheaders() if self.method == "POST": From vsajip at users.sourceforge.net Tue Oct 11 15:16:50 2005 From: vsajip at users.sourceforge.net (vsajip@users.sourceforge.net) Date: Tue, 11 Oct 2005 15:16:50 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/logging handlers.py, 1.19.2.1, 1.19.2.2 Message-ID: <20051011131650.0174D1E400C@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/logging In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18281 Modified Files: Tag: release24-maint handlers.py Log Message: Added Host and Content-type headers to requests sent by HTTPHandler (suggested by Steven Vereecken) Index: handlers.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/logging/handlers.py,v retrieving revision 1.19.2.1 retrieving revision 1.19.2.2 diff -u -d -r1.19.2.1 -r1.19.2.2 --- handlers.py 31 Mar 2005 20:12:55 -0000 1.19.2.1 +++ handlers.py 11 Oct 2005 13:16:46 -0000 1.19.2.2 @@ -854,7 +854,8 @@ """ try: import httplib, urllib - h = httplib.HTTP(self.host) + host = self.host + h = httplib.HTTP(host) url = self.url data = urllib.urlencode(self.mapLogRecord(record)) if self.method == "GET": @@ -864,7 +865,15 @@ sep = '?' url = url + "%c%s" % (sep, data) h.putrequest(self.method, url) + # support multiple hosts on one IP address... + # need to strip optional :port from host, if present + i = string.find(host, ":") + if i >= 0: + host = host[:i] + h.putheader("Host", host) if self.method == "POST": + h.putheader("Content-type", + "application/x-www-form-urlencoded") h.putheader("Content-length", str(len(data))) h.endheaders() if self.method == "POST": From jhylton at users.sourceforge.net Tue Oct 11 21:18:14 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Tue, 11 Oct 2005 21:18:14 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Include symtable.h, 2.9.18.12, 2.9.18.13 Message-ID: <20051011191814.F3C591E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20508/Include Modified Files: Tag: ast-branch symtable.h Log Message: Fix symbol table to catch several scoping syntax errors. It's illegal to mix import * or bare exec with nested scopes, because there is no unambiguous way to decide whether to treat variables are free variables or globals. The symbol table change a bit to detect these errors. ste_optimized was renamed ste_unoptimized, to match its use: It contains a non-zero value with an unoptimized namespace (e.g. LOAD_NAME) will be used. Since the top-level uses LOAD_NAME, added OPT_TOPLEVEL along with the other OPT_ defines. Add an ste_free flag as a cheap way to tell if a block has free variables, including those inherited from children. Actually compute ste_free_child and ste_opt_lineno. Track rename of ste_optimized to ste_unoptimized in symbol table and compiler. Fixes test_scope. Index: symtable.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/symtable.h,v retrieving revision 2.9.18.12 retrieving revision 2.9.18.13 diff -u -d -r2.9.18.12 -r2.9.18.13 --- symtable.h 9 Jan 2005 18:49:43 -0000 2.9.18.12 +++ symtable.h 11 Oct 2005 19:18:11 -0000 2.9.18.13 @@ -30,8 +30,9 @@ PyObject *ste_varnames; /* list of variable names */ PyObject *ste_children; /* list of child ids */ block_ty ste_type; /* module, class, or function */ - int ste_optimized : 1; /* true if namespace can be optimized */ + int ste_unoptimized; /* false if namespace is optimized */ int ste_nested : 1; /* true if block is nested */ + int ste_free : 1; /* true if block has free variables */ int ste_child_free : 1; /* true if a child block has free variables, including free refs to globals */ int ste_generator : 1; /* true if namespace is a generator */ @@ -86,9 +87,11 @@ #define FREE 4 #define CELL 5 +/* The following three names are used for the ste_unoptimized bit field */ #define OPT_IMPORT_STAR 1 #define OPT_EXEC 2 #define OPT_BARE_EXEC 4 +#define OPT_TOPLEVEL 8 /* top-level names, including eval and exec */ #define GENERATOR 1 #define GENERATOR_EXPRESSION 2 From jhylton at users.sourceforge.net Tue Oct 11 21:18:15 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Tue, 11 Oct 2005 21:18:15 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python symtable.c, 2.10.8.36, 2.10.8.37 newcompile.c, 1.1.2.111, 1.1.2.112 Message-ID: <20051011191815.719551E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20508/Python Modified Files: Tag: ast-branch symtable.c newcompile.c Log Message: Fix symbol table to catch several scoping syntax errors. It's illegal to mix import * or bare exec with nested scopes, because there is no unambiguous way to decide whether to treat variables are free variables or globals. The symbol table change a bit to detect these errors. ste_optimized was renamed ste_unoptimized, to match its use: It contains a non-zero value with an unoptimized namespace (e.g. LOAD_NAME) will be used. Since the top-level uses LOAD_NAME, added OPT_TOPLEVEL along with the other OPT_ defines. Add an ste_free flag as a cheap way to tell if a block has free variables, including those inherited from children. Actually compute ste_free_child and ste_opt_lineno. Track rename of ste_optimized to ste_unoptimized in symbol table and compiler. Fixes test_scope. Index: symtable.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/symtable.c,v retrieving revision 2.10.8.36 retrieving revision 2.10.8.37 diff -u -d -r2.10.8.36 -r2.10.8.37 --- symtable.c 31 Aug 2005 01:48:41 -0000 2.10.8.36 +++ symtable.c 11 Oct 2005 19:18:11 -0000 2.10.8.37 @@ -47,14 +47,15 @@ ste->ste_children = v; ste->ste_type = block; - ste->ste_optimized = block == FunctionBlock; + ste->ste_unoptimized = 0; + ste->ste_nested = 0; + ste->ste_free = 0; ste->ste_varargs = 0; ste->ste_varkeywords = 0; ste->ste_opt_lineno = 0; ste->ste_tmpname = 0; ste->ste_lineno = lineno; - ste->ste_nested = 0; if (st->st_cur != NULL && (st->st_cur->ste_nested || st->st_cur->ste_type == FunctionBlock)) @@ -214,6 +215,7 @@ symtable_enter_block(st, GET_IDENTIFIER(top), ModuleBlock, (void *)mod, 0); st->st_top = st->st_cur; + st->st_cur->ste_unoptimized = OPT_TOPLEVEL; /* Any other top-level initialization? */ switch (mod->kind) { case Module_kind: @@ -222,7 +224,7 @@ if (!symtable_visit_stmt(st, asdl_seq_GET(seq, i))) goto error; break; - case Expression_kind: + case Expression_kind: if (!symtable_visit_expr(st, mod->v.Expression.body)) goto error; break; @@ -335,11 +337,13 @@ /* Decide on scope of name, given flags. The dicts passed in as arguments are modified as necessary. + ste is passed so that flags can be updated. */ static int -analyze_name(PyObject *dict, PyObject *name, int flags, PyObject *bound, - PyObject *local, PyObject *free, PyObject *global, int nested) +analyze_name(PySTEntryObject *ste, PyObject *dict, PyObject *name, int flags, + PyObject *bound, PyObject *local, PyObject *free, + PyObject *global) { if (flags & DEF_GLOBAL) { if (flags & DEF_PARAM) { @@ -374,6 +378,7 @@ */ if (bound && PyDict_GetItem(bound, name)) { SET_SCOPE(dict, name, FREE); + ste->ste_free = 1; if (PyDict_SetItem(free, name, Py_None) < 0) return 0; return 1; @@ -386,6 +391,8 @@ return 1; } else { + if (ste->ste_nested) + ste->ste_free = 1; SET_SCOPE(dict, name, GLOBAL_IMPLICIT); return 1; } @@ -433,7 +440,53 @@ return success; } -/* Enter the final scope information into the st_symbols dict. */ +/* Check for illegal statements in unoptimized namespaces */ +static int +check_unoptimized(const PySTEntryObject* ste) { + char buf[300]; + + if (ste->ste_type == ModuleBlock || !ste->ste_unoptimized + || !(ste->ste_free || ste->ste_child_free)) + return 1; + + const char* trailer = (ste->ste_child_free ? + "contains a nested function with free variables" : + "is a nested function"); + + switch (ste->ste_unoptimized) { + case OPT_TOPLEVEL: /* exec / import * at top-level is fine */ + case OPT_EXEC: /* qualified exec is fine */ + return 1; + case OPT_IMPORT_STAR: + PyOS_snprintf(buf, sizeof(buf), + "import * is not allowed in function '%.100s' " + "because it is %s", + PyString_AS_STRING(ste->ste_name), trailer); + break; + case OPT_BARE_EXEC: + PyOS_snprintf(buf, sizeof(buf), + "unqualified exec is not allowed in function " + "'%.100s' it %s", + PyString_AS_STRING(ste->ste_name), trailer); + break; + default: + PyOS_snprintf(buf, sizeof(buf), + "function '%.100s' uses import * and bare exec, " + "which are illegal because it %s", + PyString_AS_STRING(ste->ste_name), trailer); + break; + } + + PyErr_SetString(PyExc_SyntaxError, buf); + PyErr_SyntaxLocation(ste->ste_table->st_filename, + ste->ste_opt_lineno); + return 0; +} + +/* Enter the final scope information into the st_symbols dict. + * + * All arguments are dicts. Modifies symbols, others are read-only. +*/ static int update_symbols(PyObject *symbols, PyObject *scope, PyObject *bound, PyObject *free, int class) @@ -501,11 +554,11 @@ /* Make final symbol table decisions for block of ste. Arguments: + ste -- current symtable entry (input/output) bound -- set of variables bound in enclosing scopes (input) free -- set of free variables in enclosed scopes (output) globals -- set of declared global variables in enclosing scopes (input) */ - static int analyze_block(PySTEntryObject *ste, PyObject *bound, PyObject *free, @@ -547,8 +600,8 @@ assert(PyDict_Check(ste->ste_symbols)); while (PyDict_Next(ste->ste_symbols, &pos, &name, &v)) { flags = PyInt_AS_LONG(v); - if (!analyze_name(scope, name, flags, bound, local, free, - global, ste->ste_nested)) + if (!analyze_name(ste, scope, name, flags, bound, local, free, + global)) goto error; } @@ -565,12 +618,15 @@ goto error; } + /* Recursively call analyze_block() on each child block */ for (i = 0; i < PyList_GET_SIZE(ste->ste_children); ++i) { PyObject *c = PyList_GET_ITEM(ste->ste_children, i); assert(c && PySTEntry_Check(c)); - if (!analyze_block((PySTEntryObject *)c, newbound, newfree, - newglobal)) + PySTEntryObject* entry = (PySTEntryObject*)c; + if (!analyze_block(entry, newbound, newfree, newglobal)) goto error; + if (entry->ste_free || entry->ste_child_free) + ste->ste_child_free = 1; } if (ste->ste_type == FunctionBlock && !analyze_cells(scope, newfree)) @@ -578,6 +634,8 @@ if (!update_symbols(ste->ste_symbols, scope, bound, newfree, ste->ste_type == ClassBlock)) goto error; + if (!check_unoptimized(ste)) + goto error; if (PyDict_Update(free, newfree) < 0) goto error; @@ -871,17 +929,29 @@ break; case Import_kind: VISIT_SEQ(st, alias, s->v.Import.names); + /* XXX Don't have the lineno available inside + visit_alias */ + if (st->st_cur->ste_unoptimized && !st->st_cur->ste_opt_lineno) + st->st_cur->ste_opt_lineno = s->lineno; break; case ImportFrom_kind: VISIT_SEQ(st, alias, s->v.ImportFrom.names); + /* XXX Don't have the lineno available inside + visit_alias */ + if (st->st_cur->ste_unoptimized && !st->st_cur->ste_opt_lineno) + st->st_cur->ste_opt_lineno = s->lineno; break; case Exec_kind: VISIT(st, expr, s->v.Exec.body); - st->st_cur->ste_optimized = 0; + if (!st->st_cur->ste_opt_lineno) + st->st_cur->ste_opt_lineno = s->lineno; if (s->v.Exec.globals) { + st->st_cur->ste_unoptimized |= OPT_EXEC; VISIT(st, expr, s->v.Exec.globals); if (s->v.Exec.locals) VISIT(st, expr, s->v.Exec.locals); + } else { + st->st_cur->ste_unoptimized |= OPT_BARE_EXEC; } break; case Global_kind: { @@ -1134,7 +1204,7 @@ "import * only allowed at module level")) return 0; } - st->st_cur->ste_optimized = 0; + st->st_cur->ste_unoptimized |= OPT_IMPORT_STAR; return 1; } } Index: newcompile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/Attic/newcompile.c,v retrieving revision 1.1.2.111 retrieving revision 1.1.2.112 diff -u -d -r1.1.2.111 -r1.1.2.112 --- newcompile.c 7 Oct 2005 18:42:50 -0000 1.1.2.111 +++ newcompile.c 11 Oct 2005 19:18:11 -0000 1.1.2.112 @@ -2289,7 +2289,7 @@ optype = OP_FAST; break; case GLOBAL_IMPLICIT: - if (c->u->u_ste->ste_optimized) + if (!c->u->u_ste->ste_unoptimized) optype = OP_GLOBAL; break; case GLOBAL_EXPLICIT: @@ -3490,7 +3490,7 @@ if (ste->ste_type != ModuleBlock) flags |= CO_NEWLOCALS; if (ste->ste_type == FunctionBlock) { - if (ste->ste_optimized) + if (!ste->ste_unoptimized) flags |= CO_OPTIMIZED; if (ste->ste_nested) flags |= CO_NESTED; From fdrake at users.sourceforge.net Tue Oct 11 22:25:25 2005 From: fdrake at users.sourceforge.net (fdrake@users.sourceforge.net) Date: Tue, 11 Oct 2005 22:25:25 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/ext run-func.c, 1.4.20.2, 1.4.20.3 Message-ID: <20051011202525.5AF6D1E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/ext In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5488 Modified Files: Tag: release24-maint run-func.c Log Message: fix stupid typo Index: run-func.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ext/run-func.c,v retrieving revision 1.4.20.2 retrieving revision 1.4.20.3 diff -u -d -r1.4.20.2 -r1.4.20.3 --- run-func.c 12 Jul 2005 13:20:56 -0000 1.4.20.2 +++ run-func.c 11 Oct 2005 20:25:19 -0000 1.4.20.3 @@ -20,7 +20,7 @@ Py_DECREF(pName); if (pModule != NULL) { - pFunc = PyDict_GetAttrString(pModule, argv[2]); + pFunc = PyDict_GetItemString(pModule, argv[2]); /* pFunc is a new reference */ if (pFunc && PyCallable_Check(pFunc)) { From fdrake at users.sourceforge.net Tue Oct 11 22:26:09 2005 From: fdrake at users.sourceforge.net (fdrake@users.sourceforge.net) Date: Tue, 11 Oct 2005 22:26:09 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/ext run-func.c,1.6,1.7 Message-ID: <20051011202609.55DC81E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/ext In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5726 Modified Files: run-func.c Log Message: fix stupid typo Index: run-func.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ext/run-func.c,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- run-func.c 12 Jul 2005 13:20:49 -0000 1.6 +++ run-func.c 11 Oct 2005 20:26:05 -0000 1.7 @@ -20,7 +20,7 @@ Py_DECREF(pName); if (pModule != NULL) { - pFunc = PyDict_GetAttrString(pModule, argv[2]); + pFunc = PyDict_GetItemString(pModule, argv[2]); /* pFunc is a new reference */ if (pFunc && PyCallable_Check(pFunc)) { From nascheme at users.sourceforge.net Tue Oct 11 23:37:32 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Tue, 11 Oct 2005 23:37:32 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python newcompile.c, 1.1.2.112, 1.1.2.113 Message-ID: <20051011213732.0CB421E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22615/Python Modified Files: Tag: ast-branch newcompile.c Log Message: Implement PyNode_Compile() for the AST compiler. Add some minimal tests for parser.compilest(). Index: newcompile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/Attic/newcompile.c,v retrieving revision 1.1.2.112 retrieving revision 1.1.2.113 diff -u -d -r1.1.2.112 -r1.1.2.113 --- newcompile.c 11 Oct 2005 19:18:11 -0000 1.1.2.112 +++ newcompile.c 11 Oct 2005 21:37:28 -0000 1.1.2.113 @@ -474,6 +474,18 @@ return co; } +PyCodeObject * +PyNode_Compile(struct _node *n, const char *filename) +{ + PyCodeObject *co; + mod_ty mod = PyAST_FromNode(n, NULL, filename); + if (!mod) + return NULL; + co = PyAST_Compile(mod, filename, NULL); + free_mod(mod); + return co; +} + static void compiler_free(struct compiler *c) { From nascheme at users.sourceforge.net Tue Oct 11 23:37:32 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Tue, 11 Oct 2005 23:37:32 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_parser.py, 1.11.2.2, 1.11.2.3 Message-ID: <20051011213732.163C11E400A@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22615/Lib/test Modified Files: Tag: ast-branch test_parser.py Log Message: Implement PyNode_Compile() for the AST compiler. Add some minimal tests for parser.compilest(). Index: test_parser.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_parser.py,v retrieving revision 1.11.2.2 retrieving revision 1.11.2.3 diff -u -d -r1.11.2.2 -r1.11.2.3 --- test_parser.py 7 Jan 2005 06:59:10 -0000 1.11.2.2 +++ test_parser.py 11 Oct 2005 21:37:28 -0000 1.11.2.3 @@ -397,10 +397,33 @@ (0, '')) self.check_bad_tree(tree, "malformed global ast") + +class CompileTestCase(unittest.TestCase): + + # These tests are very minimal. :-( + + def test_compile_expr(self): + st = parser.expr('2 + 3') + code = parser.compilest(st) + self.assertEquals(eval(code), 5) + + def test_compile_suite(self): + st = parser.suite('x = 2; y = x + 3') + code = parser.compilest(st) + globs = {} + exec code in globs + self.assertEquals(globs['y'], 5) + + def test_compile_error(self): + st = parser.suite('1 = 3 + 4') + self.assertRaises(SyntaxError, parser.compilest, st) + + def test_main(): test_support.run_unittest( RoundtripLegalSyntaxTestCase, - IllegalSyntaxTestCase + IllegalSyntaxTestCase, + CompileTestCase, ) From jhylton at users.sourceforge.net Wed Oct 12 00:03:16 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Wed, 12 Oct 2005 00:03:16 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Include pythonrun.h, 2.49.2.7, 2.49.2.8 Message-ID: <20051011220316.547071E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29290/Include Modified Files: Tag: ast-branch pythonrun.h Log Message: Fixer typos in PyParser_SimpleParseString and File. The #defines were wrong. Fixing them required twiddling the actual definitions, too. Index: pythonrun.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/pythonrun.h,v retrieving revision 2.49.2.7 retrieving revision 2.49.2.8 diff -u -d -r2.49.2.7 -r2.49.2.8 --- pythonrun.h 7 Jan 2005 06:57:42 -0000 2.49.2.7 +++ pythonrun.h 11 Oct 2005 22:03:13 -0000 2.49.2.8 @@ -41,10 +41,10 @@ PyAPI_FUNC(struct _mod *) PyParser_ASTFromFile(FILE *, const char *, int, char *, char *, PyCompilerFlags *, int *); -#define PyParser_SimpleParserString(S, B) \ - PyParser_SimplerParserStringFlags(S, B, 0) -#define PyParser_SimpleParserFile(FP, S, B) \ - PyParser_SimplerParserFileFlags(FP, S, B, 0) +#define PyParser_SimpleParseString(S, B) \ + PyParser_SimpleParseStringFlags(S, B, 0) +#define PyParser_SimpleParseFile(FP, S, B) \ + PyParser_SimpleParseFileFlags(FP, S, B, 0) PyAPI_FUNC(struct _node *) PyParser_SimpleParseStringFlags(const char *, int, int); PyAPI_FUNC(struct _node *) PyParser_SimpleParseFileFlags(FILE *, const char *, From jhylton at users.sourceforge.net Wed Oct 12 00:03:16 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Wed, 12 Oct 2005 00:03:16 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python pythonrun.c, 2.161.2.16, 2.161.2.17 Message-ID: <20051011220316.739D51E400A@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29290/Python Modified Files: Tag: ast-branch pythonrun.c Log Message: Fixer typos in PyParser_SimpleParseString and File. The #defines were wrong. Fixing them required twiddling the actual definitions, too. Index: pythonrun.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/pythonrun.c,v retrieving revision 2.161.2.16 retrieving revision 2.161.2.17 diff -u -d -r2.161.2.16 -r2.161.2.17 --- pythonrun.c 7 Jan 2005 17:25:18 -0000 2.161.2.16 +++ pythonrun.c 11 Oct 2005 22:03:13 -0000 2.161.2.17 @@ -1312,12 +1312,6 @@ return n; } -node * -PyParser_SimpleParseFile(FILE *fp, const char *filename, int start) -{ - return PyParser_SimpleParseFileFlags(fp, filename, start, 0); -} - /* Simplified interface to parsestring -- return node or set exception */ node * @@ -1333,12 +1327,6 @@ } node * -PyParser_SimpleParseString(const char *str, int start) -{ - return PyParser_SimpleParseStringFlags(str, start, 0); -} - -node * PyParser_SimpleParseStringFlagsFilename(const char *str, const char *filename, int start, int flags) { @@ -1642,3 +1630,21 @@ return oldhandler; #endif } + +/* Deprecated C API functions still provided for binary compatiblity */ + +#undef PyParser_SimpleParseFile +#undef PyParser_SimpleParseString + +node * +PyParser_SimpleParseFile(FILE *fp, const char *filename, int start) +{ + return PyParser_SimpleParseFileFlags(fp, filename, start, 0); +} + +node * +PyParser_SimpleParseString(const char *str, int start) +{ + return PyParser_SimpleParseStringFlags(str, start, 0); +} + From jhylton at users.sourceforge.net Wed Oct 12 00:29:09 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Wed, 12 Oct 2005 00:29:09 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python newcompile.c, 1.1.2.113, 1.1.2.114 Message-ID: <20051011222909.7EA541E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5663/Python Modified Files: Tag: ast-branch newcompile.c Log Message: Get rid of compiler warning for PyAST_FromNode(). Index: newcompile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/Attic/newcompile.c,v retrieving revision 1.1.2.113 retrieving revision 1.1.2.114 diff -u -d -r1.1.2.113 -r1.1.2.114 --- newcompile.c 11 Oct 2005 21:37:28 -0000 1.1.2.113 +++ newcompile.c 11 Oct 2005 22:29:06 -0000 1.1.2.114 @@ -17,6 +17,8 @@ #include "Python.h" #include "Python-ast.h" +#include "node.h" +#include "ast.h" #include "code.h" #include "compile.h" #include "symtable.h" From nascheme at users.sourceforge.net Wed Oct 12 00:50:49 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Wed, 12 Oct 2005 00:50:49 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_peepholer.py, 1.6.2.1, 1.6.2.2 Message-ID: <20051011225049.312A81E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9954/Lib/test Modified Files: Tag: ast-branch test_peepholer.py Log Message: Merge changes from HEAD into test_peepholer.py. Index: test_peepholer.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_peepholer.py,v retrieving revision 1.6.2.1 retrieving revision 1.6.2.2 diff -u -d -r1.6.2.1 -r1.6.2.2 --- test_peepholer.py 7 Jan 2005 06:59:10 -0000 1.6.2.1 +++ test_peepholer.py 11 Oct 2005 22:50:45 -0000 1.6.2.2 @@ -109,7 +109,6 @@ ('a="abc" + "def"', "('abcdef')"), # check string ops ('a = 3**4', '(81)'), # binary power ('a = 3*4', '(12)'), # binary multiply - ('a = 13/4.0', '(3.25)'), # binary divide ('a = 13//4', '(3)'), # binary floor divide ('a = 14%4', '(2)'), # binary modulo ('a = 2+3', '(5)'), # binary add @@ -130,6 +129,29 @@ self.assert_('(2)' in asm) self.assert_("('b')" in asm) + # Verify that large sequences do not result from folding + asm = dis_single('a="x"*1000') + self.assert_('(1000)' in asm) + + def test_folding_of_unaryops_on_constants(self): + for line, elem in ( + ('`1`', "('1')"), # unary convert + ('-0.5', '(-0.5)'), # unary negative + ('~-2', '(1)'), # unary invert + ): + asm = dis_single(line) + self.assert_(elem in asm, asm) + self.assert_('UNARY_' not in asm) + + # Verify that unfoldables are skipped + for line, elem in ( + ('-"abc"', "('abc')"), # unary negative + ('~"abc"', "('abc')"), # unary invert + ): + asm = dis_single(line) + self.assert_(elem in asm, asm) + self.assert_('UNARY_' in asm) + def test_elim_extra_return(self): # RETURN LOAD_CONST None RETURN --> RETURN def f(x): From nascheme at users.sourceforge.net Wed Oct 12 00:54:08 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Wed, 12 Oct 2005 00:54:08 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python newcompile.c, 1.1.2.114, 1.1.2.115 Message-ID: <20051011225408.3A67E1E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv10415/Python Modified Files: Tag: ast-branch newcompile.c Log Message: Integrate peephole optimizer from HEAD version of compiler.c. Index: newcompile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/Attic/newcompile.c,v retrieving revision 1.1.2.114 retrieving revision 1.1.2.115 diff -u -d -r1.1.2.114 -r1.1.2.115 --- newcompile.c 11 Oct 2005 22:29:06 -0000 1.1.2.114 +++ newcompile.c 11 Oct 2005 22:54:03 -0000 1.1.2.115 @@ -568,6 +568,598 @@ return dest; } +/* Begin: Peephole optimizations ----------------------------------------- */ + +#define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1])) +#define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD) +#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP) +#define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3)) +#define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255 +#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1) +#define ISBASICBLOCK(blocks, start, bytes) (blocks[start]==blocks[start+bytes-1]) + +/* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n + with LOAD_CONST (c1, c2, ... cn). + The consts table must still be in list form so that the + new constant (c1, c2, ... cn) can be appended. + Called with codestr pointing to the first LOAD_CONST. + Bails out with no change if one or more of the LOAD_CONSTs is missing. + Also works for BUILD_LIST when followed by an "in" or "not in" test. +*/ +static int +tuple_of_constants(unsigned char *codestr, int n, PyObject *consts) +{ + PyObject *newconst, *constant; + int i, arg, len_consts; + + /* Pre-conditions */ + assert(PyList_CheckExact(consts)); + assert(codestr[n*3] == BUILD_TUPLE || codestr[n*3] == BUILD_LIST); + assert(GETARG(codestr, (n*3)) == n); + for (i=0 ; i 20) { + Py_DECREF(newconst); + return 0; + } + + /* Append folded constant into consts table */ + len_consts = PyList_GET_SIZE(consts); + if (PyList_Append(consts, newconst)) { + Py_DECREF(newconst); + return 0; + } + Py_DECREF(newconst); + + /* Write NOP NOP NOP NOP LOAD_CONST newconst */ + memset(codestr, NOP, 4); + codestr[4] = LOAD_CONST; + SETARG(codestr, 4, len_consts); + return 1; +} + +static int +fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts) +{ + PyObject *newconst=NULL, *v; + int len_consts, opcode; + + /* Pre-conditions */ + assert(PyList_CheckExact(consts)); + assert(codestr[0] == LOAD_CONST); + + /* Create new constant */ + v = PyList_GET_ITEM(consts, GETARG(codestr, 0)); + opcode = codestr[3]; + switch (opcode) { + case UNARY_NEGATIVE: + /* Preserve the sign of -0.0 */ + if (PyObject_IsTrue(v) == 1) + newconst = PyNumber_Negative(v); + break; + case UNARY_CONVERT: + newconst = PyObject_Repr(v); + break; + case UNARY_INVERT: + newconst = PyNumber_Invert(v); + break; + default: + /* Called with an unknown opcode */ + assert(0); + return 0; + } + if (newconst == NULL) { + PyErr_Clear(); + return 0; + } + + /* Append folded constant into consts table */ + len_consts = PyList_GET_SIZE(consts); + if (PyList_Append(consts, newconst)) { + Py_DECREF(newconst); + return 0; + } + Py_DECREF(newconst); + + /* Write NOP LOAD_CONST newconst */ + codestr[0] = NOP; + codestr[1] = LOAD_CONST; + SETARG(codestr, 1, len_consts); + return 1; +} + +static unsigned int * +markblocks(unsigned char *code, int len) +{ + unsigned int *blocks = PyMem_Malloc(len*sizeof(int)); + int i,j, opcode, blockcnt = 0; + + if (blocks == NULL) + return NULL; + memset(blocks, 0, len*sizeof(int)); + + /* Mark labels in the first pass */ + for (i=0 ; i= 255. + + Optimizations are restricted to simple transformations occuring within a + single basic block. All transformations keep the code size the same or + smaller. For those that reduce size, the gaps are initially filled with + NOPs. Later those NOPs are removed and the jump addresses retargeted in + a single pass. Line numbering is adjusted accordingly. */ + +static PyObject * +optimize_code(PyObject *code, PyObject* consts, PyObject *names, PyObject *lineno_obj) +{ + int i, j, codelen, nops, h, adj; + int tgt, tgttgt, opcode; + unsigned char *codestr = NULL; + unsigned char *lineno; + int *addrmap = NULL; + int new_line, cum_orig_line, last_line, tabsiz; + int cumlc=0, lastlc=0; /* Count runs of consecutive LOAD_CONST codes */ + unsigned int *blocks = NULL; + char *name; + + /* Bail out if an exception is set */ + if (PyErr_Occurred()) + goto exitUnchanged; + + /* Bypass optimization when the lineno table is too complex */ + assert(PyString_Check(lineno_obj)); + lineno = (unsigned char*)PyString_AS_STRING(lineno_obj); + tabsiz = PyString_GET_SIZE(lineno_obj); + if (memchr(lineno, 255, tabsiz) != NULL) + goto exitUnchanged; + + /* Avoid situations where jump retargeting could overflow */ + assert(PyString_Check(code)); + codelen = PyString_Size(code); + if (codelen > 32700) + goto exitUnchanged; + + /* Make a modifiable copy of the code string */ + codestr = PyMem_Malloc(codelen); + if (codestr == NULL) + goto exitUnchanged; + codestr = memcpy(codestr, PyString_AS_STRING(code), codelen); + + /* Verify that RETURN_VALUE terminates the codestring. This allows + the various transformation patterns to look ahead several + instructions without additional checks to make sure they are not + looking beyond the end of the code string. + */ + if (codestr[codelen-1] != RETURN_VALUE) + goto exitUnchanged; + + /* Mapping to new jump targets after NOPs are removed */ + addrmap = PyMem_Malloc(codelen * sizeof(int)); + if (addrmap == NULL) + goto exitUnchanged; + + blocks = markblocks(codestr, codelen); + if (blocks == NULL) + goto exitUnchanged; + assert(PyList_Check(consts)); + + for (i=0 ; i a is not b + not a in b --> a not in b + not a is not b --> a is b + not a not in b --> a in b + */ + case COMPARE_OP: + j = GETARG(codestr, i); + if (j < 6 || j > 9 || + codestr[i+3] != UNARY_NOT || + !ISBASICBLOCK(blocks,i,4)) + continue; + SETARG(codestr, i, (j^1)); + codestr[i+3] = NOP; + break; + + /* Replace LOAD_GLOBAL/LOAD_NAME None with LOAD_CONST None */ + case LOAD_NAME: + case LOAD_GLOBAL: + j = GETARG(codestr, i); + name = PyString_AsString(PyTuple_GET_ITEM(names, j)); + if (name == NULL || strcmp(name, "None") != 0) + continue; + for (j=0 ; j < PyList_GET_SIZE(consts) ; j++) { + if (PyList_GET_ITEM(consts, j) == Py_None) { + codestr[i] = LOAD_CONST; + SETARG(codestr, i, j); + cumlc = lastlc + 1; + break; + } + } + break; + + /* Skip over LOAD_CONST trueconst JUMP_IF_FALSE xx POP_TOP */ + case LOAD_CONST: + cumlc = lastlc + 1; + j = GETARG(codestr, i); + if (codestr[i+3] != JUMP_IF_FALSE || + codestr[i+6] != POP_TOP || + !ISBASICBLOCK(blocks,i,7) || + !PyObject_IsTrue(PyList_GET_ITEM(consts, j))) + continue; + memset(codestr+i, NOP, 7); + cumlc = 0; + break; + + /* Try to fold tuples of constants (includes a case for lists + which are only used for "in" and "not in" tests). + Skip over BUILD_SEQN 1 UNPACK_SEQN 1. + Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2. + Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */ + case BUILD_TUPLE: + case BUILD_LIST: + j = GETARG(codestr, i); + h = i - 3 * j; + if (h >= 0 && + j <= lastlc && + ((opcode == BUILD_TUPLE && + ISBASICBLOCK(blocks, h, 3*(j+1))) || + (opcode == BUILD_LIST && + codestr[i+3]==COMPARE_OP && + ISBASICBLOCK(blocks, h, 3*(j+2)) && + (GETARG(codestr,i+3)==6 || + GETARG(codestr,i+3)==7))) && + tuple_of_constants(&codestr[h], j, consts)) { + assert(codestr[i] == LOAD_CONST); + cumlc = 1; + break; + } + if (codestr[i+3] != UNPACK_SEQUENCE || + !ISBASICBLOCK(blocks,i,6) || + j != GETARG(codestr, i+3)) + continue; + if (j == 1) { + memset(codestr+i, NOP, 6); + } else if (j == 2) { + codestr[i] = ROT_TWO; + memset(codestr+i+1, NOP, 5); + } else if (j == 3) { + codestr[i] = ROT_THREE; + codestr[i+1] = ROT_TWO; + memset(codestr+i+2, NOP, 4); + } + break; + + /* Fold binary ops on constants. + LOAD_CONST c1 LOAD_CONST c2 BINOP --> LOAD_CONST binop(c1,c2) */ + case BINARY_POWER: + case BINARY_MULTIPLY: + case BINARY_TRUE_DIVIDE: + case BINARY_FLOOR_DIVIDE: + case BINARY_MODULO: + case BINARY_ADD: + case BINARY_SUBTRACT: + case BINARY_SUBSCR: + case BINARY_LSHIFT: + case BINARY_RSHIFT: + case BINARY_AND: + case BINARY_XOR: + case BINARY_OR: + if (lastlc >= 2 && + ISBASICBLOCK(blocks, i-6, 7) && + fold_binops_on_constants(&codestr[i-6], consts)) { + i -= 2; + assert(codestr[i] == LOAD_CONST); + cumlc = 1; + } + break; + + /* Fold unary ops on constants. + LOAD_CONST c1 UNARY_OP --> LOAD_CONST unary_op(c) */ + case UNARY_NEGATIVE: + case UNARY_CONVERT: + case UNARY_INVERT: + if (lastlc >= 1 && + ISBASICBLOCK(blocks, i-3, 4) && + fold_unaryops_on_constants(&codestr[i-3], consts)) { + i -= 2; + assert(codestr[i] == LOAD_CONST); + cumlc = 1; + } + break; + + /* Simplify conditional jump to conditional jump where the + result of the first test implies the success of a similar + test or the failure of the opposite test. + Arises in code like: + "if a and b:" + "if a or b:" + "a and b or c" + "(a and b) and c" + x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z + x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3 + where y+3 is the instruction following the second test. + */ + case JUMP_IF_FALSE: + case JUMP_IF_TRUE: + tgt = GETJUMPTGT(codestr, i); + j = codestr[tgt]; + if (j == JUMP_IF_FALSE || j == JUMP_IF_TRUE) { + if (j == opcode) { + tgttgt = GETJUMPTGT(codestr, tgt) - i - 3; + SETARG(codestr, i, tgttgt); + } else { + tgt -= i; + SETARG(codestr, i, tgt); + } + break; + } + /* Intentional fallthrough */ + + /* Replace jumps to unconditional jumps */ + case FOR_ITER: + case JUMP_FORWARD: + case JUMP_ABSOLUTE: + case CONTINUE_LOOP: + case SETUP_LOOP: + case SETUP_EXCEPT: + case SETUP_FINALLY: + tgt = GETJUMPTGT(codestr, i); + if (!UNCONDITIONAL_JUMP(codestr[tgt])) + continue; + tgttgt = GETJUMPTGT(codestr, tgt); + if (opcode == JUMP_FORWARD) /* JMP_ABS can go backwards */ + opcode = JUMP_ABSOLUTE; + if (!ABSOLUTE_JUMP(opcode)) + tgttgt -= i + 3; /* Calc relative jump addr */ + if (tgttgt < 0) /* No backward relative jumps */ + continue; + codestr[i] = opcode; + SETARG(codestr, i, tgttgt); + break; + + case EXTENDED_ARG: + goto exitUnchanged; + + /* Replace RETURN LOAD_CONST None RETURN with just RETURN */ + case RETURN_VALUE: + if (i+4 >= codelen || + codestr[i+4] != RETURN_VALUE || + !ISBASICBLOCK(blocks,i,5)) + continue; + memset(codestr+i+1, NOP, 4); + break; + } + } + + /* Fixup linenotab */ + for (i=0, nops=0 ; iu->u_consts, 0); + tmp = dict_keys_inorder(c->u->u_consts, 0); + if (!tmp) + goto error; + consts = PySequence_List(tmp); /* optimize_code requires a list */ + Py_DECREF(tmp); + names = dict_keys_inorder(c->u->u_names, 0); varnames = dict_keys_inorder(c->u->u_varnames, 0); if (!consts || !names || !varnames) @@ -3566,9 +4165,20 @@ nlocals = PyDict_Size(c->u->u_varnames); flags = compute_code_flags(c); if (flags < 0) - goto error; + goto error; + + bytecode = optimize_code(a->a_bytecode, consts, names, a->a_lnotab); + if (!bytecode) + goto error; + + tmp = PyList_AsTuple(consts); /* PyCode_New requires a tuple */ + if (!tmp) + goto error; + Py_DECREF(consts); + consts = tmp; + co = PyCode_New(c->u->u_argcount, nlocals, stackdepth(c), flags, - a->a_bytecode, consts, names, varnames, + bytecode, consts, names, varnames, freevars, cellvars, filename, c->u->u_name, c->u->u_firstlineno, @@ -3581,6 +4191,7 @@ Py_XDECREF(name); Py_XDECREF(freevars); Py_XDECREF(cellvars); + Py_XDECREF(bytecode); return co; } From jhylton at users.sourceforge.net Wed Oct 12 05:18:11 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Wed, 12 Oct 2005 05:18:11 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Include Python-ast.h, 1.1.2.12, 1.1.2.13 Message-ID: <20051012031811.20AEB1E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12337/Include Modified Files: Tag: ast-branch Python-ast.h Log Message: Add line numbers to expressions. The line numbers aren't used yet, but should make it possible to generate correct line numbers (e.g. co_lnotab, co_firstlineno). Several tests fails because of the incorrect line numbers currently generated. Index: Python-ast.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/Attic/Python-ast.h,v retrieving revision 1.1.2.12 retrieving revision 1.1.2.13 diff -u -d -r1.1.2.12 -r1.1.2.13 --- Python-ast.h 13 Apr 2005 19:44:37 -0000 1.1.2.12 +++ Python-ast.h 12 Oct 2005 03:18:07 -0000 1.1.2.13 @@ -1,4 +1,4 @@ -/* File automatically generated by ./Parser/asdl_c.py */ +/* File automatically generated by ../Parser/asdl_c.py */ #include "asdl.h" @@ -273,6 +273,7 @@ } Tuple; } v; + int lineno; }; struct _slice { @@ -356,24 +357,27 @@ stmt_ty Pass(int lineno); stmt_ty Break(int lineno); stmt_ty Continue(int lineno); -expr_ty BoolOp(boolop_ty op, asdl_seq * values); -expr_ty BinOp(expr_ty left, operator_ty op, expr_ty right); -expr_ty UnaryOp(unaryop_ty op, expr_ty operand); -expr_ty Lambda(arguments_ty args, expr_ty body); -expr_ty Dict(asdl_seq * keys, asdl_seq * values); -expr_ty ListComp(expr_ty elt, asdl_seq * generators); -expr_ty GeneratorExp(expr_ty elt, asdl_seq * generators); -expr_ty Compare(expr_ty left, asdl_seq * ops, asdl_seq * comparators); +expr_ty BoolOp(boolop_ty op, asdl_seq * values, int lineno); +expr_ty BinOp(expr_ty left, operator_ty op, expr_ty right, int lineno); +expr_ty UnaryOp(unaryop_ty op, expr_ty operand, int lineno); +expr_ty Lambda(arguments_ty args, expr_ty body, int lineno); +expr_ty Dict(asdl_seq * keys, asdl_seq * values, int lineno); +expr_ty ListComp(expr_ty elt, asdl_seq * generators, int lineno); +expr_ty GeneratorExp(expr_ty elt, asdl_seq * generators, int lineno); +expr_ty Compare(expr_ty left, asdl_seq * ops, asdl_seq * comparators, int + lineno); expr_ty Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, expr_ty - starargs, expr_ty kwargs); -expr_ty Repr(expr_ty value); -expr_ty Num(object n); -expr_ty Str(string s); -expr_ty Attribute(expr_ty value, identifier attr, expr_context_ty ctx); -expr_ty Subscript(expr_ty value, slice_ty slice, expr_context_ty ctx); -expr_ty Name(identifier id, expr_context_ty ctx); -expr_ty List(asdl_seq * elts, expr_context_ty ctx); -expr_ty Tuple(asdl_seq * elts, expr_context_ty ctx); + starargs, expr_ty kwargs, int lineno); +expr_ty Repr(expr_ty value, int lineno); +expr_ty Num(object n, int lineno); +expr_ty Str(string s, int lineno); +expr_ty Attribute(expr_ty value, identifier attr, expr_context_ty ctx, int + lineno); +expr_ty Subscript(expr_ty value, slice_ty slice, expr_context_ty ctx, int + lineno); +expr_ty Name(identifier id, expr_context_ty ctx, int lineno); +expr_ty List(asdl_seq * elts, expr_context_ty ctx, int lineno); +expr_ty Tuple(asdl_seq * elts, expr_context_ty ctx, int lineno); slice_ty Ellipsis(void); slice_ty Slice(expr_ty lower, expr_ty upper, expr_ty step); slice_ty ExtSlice(asdl_seq * dims); From jhylton at users.sourceforge.net Wed Oct 12 05:18:11 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Wed, 12 Oct 2005 05:18:11 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Parser Python.asdl, 1.1.2.11, 1.1.2.12 Message-ID: <20051012031811.7BFD31E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Parser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12337/Parser Modified Files: Tag: ast-branch Python.asdl Log Message: Add line numbers to expressions. The line numbers aren't used yet, but should make it possible to generate correct line numbers (e.g. co_lnotab, co_firstlineno). Several tests fails because of the incorrect line numbers currently generated. Index: Python.asdl =================================================================== RCS file: /cvsroot/python/python/dist/src/Parser/Attic/Python.asdl,v retrieving revision 1.1.2.11 retrieving revision 1.1.2.12 diff -u -d -r1.1.2.11 -r1.1.2.12 --- Python.asdl 13 Apr 2005 19:59:20 -0000 1.1.2.11 +++ Python.asdl 12 Oct 2005 03:18:07 -0000 1.1.2.12 @@ -72,6 +72,8 @@ | List(expr* elts, expr_context ctx) | Tuple(expr *elts, expr_context ctx) + attributes (int lineno) + expr_context = Load | Store | Del | AugLoad | AugStore | Param slice = Ellipsis | Slice(expr? lower, expr? upper, expr? step) From jhylton at users.sourceforge.net Wed Oct 12 05:18:11 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Wed, 12 Oct 2005 05:18:11 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python ast.c, 1.1.2.65, 1.1.2.66 newcompile.c, 1.1.2.115, 1.1.2.116 Message-ID: <20051012031811.9DEF01E400A@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12337/Python Modified Files: Tag: ast-branch ast.c newcompile.c Log Message: Add line numbers to expressions. The line numbers aren't used yet, but should make it possible to generate correct line numbers (e.g. co_lnotab, co_firstlineno). Several tests fails because of the incorrect line numbers currently generated. Index: ast.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/Attic/ast.c,v retrieving revision 1.1.2.65 retrieving revision 1.1.2.66 diff -u -d -r1.1.2.65 -r1.1.2.66 --- ast.c 29 Sep 2005 19:27:58 -0000 1.1.2.65 +++ ast.c 12 Oct 2005 03:18:07 -0000 1.1.2.66 @@ -338,7 +338,8 @@ /* Set the context ctx for expr_ty e returning 0 on success, -1 on error. Only sets context for expr kinds that "can appear in assignment context" - (according to ../Parser/Python.asdl) + (according to ../Parser/Python.asdl). For other expr kinds, it sets + an appropriate syntax error and returns false. If e is a sequential type, items in sequence will also have their context set. @@ -373,6 +374,8 @@ s = e->v.List.elts; break; case Tuple_kind: + if (asdl_seq_LEN(e->v.Tuple.elts) == 0) + return ast_error(n, "can't assign to ()"); e->v.Tuple.ctx = ctx; s = e->v.Tuple.elts; break; @@ -384,6 +387,11 @@ else return ast_error(n, "unexpected operation on function call"); break; + case BinOp_kind: + return ast_error(n, "can't assign to operator"); + case GeneratorExp_kind: + return ast_error(n, "assignment to generator expression " + "not possible"); default: return ast_error(n, "unexpected node in assignment"); break; @@ -533,7 +541,7 @@ ast_error(child, "assignment to None"); return NULL; } - arg = Name(NEW_IDENTIFIER(child), Store); + arg = Name(NEW_IDENTIFIER(child), Store, LINENO(child)); } else arg = compiler_complex_args(CHILD(CHILD(n, 2*i), 1)); @@ -541,7 +549,7 @@ asdl_seq_SET(args, i, arg); } - result = Tuple(args, Store); + result = Tuple(args, Store, LINENO(n)); set_context(result, Store, n); return result; } @@ -620,8 +628,9 @@ goto error; } /* XXX check return value of Name call */ - asdl_seq_APPEND(args, Name(NEW_IDENTIFIER(CHILD(ch, 0)), - Param)); + asdl_seq_APPEND(args, + Name(NEW_IDENTIFIER(CHILD(ch, 0)), + Param, LINENO(ch))); } i += 2; /* the name and the comma */ break; @@ -672,7 +681,7 @@ id = NEW_IDENTIFIER(CHILD(n, 0)); if (!id) goto error; - e = Name(id, Load); + e = Name(id, Load, LINENO(n)); if (!e) goto error; id = NULL; @@ -681,7 +690,7 @@ id = NEW_IDENTIFIER(CHILD(n, i)); if (!id) goto error; - attrib = Attribute(e, id, Load); + attrib = Attribute(e, id, Load, LINENO(CHILD(n, i))); if (!attrib) goto error; e = attrib; @@ -720,7 +729,7 @@ name_expr = NULL; } else if (NCH(n) == 5) { /* Call with no arguments */ - d = Call(name_expr, NULL, NULL, NULL, NULL); + d = Call(name_expr, NULL, NULL, NULL, NULL, LINENO(n)); if (!d) goto error; name_expr = NULL; @@ -827,8 +836,6 @@ expression = ast_for_expr(c, CHILD(n, 2)); if (!expression) return NULL; - - return Lambda(args, expression); } else { args = ast_for_arguments(c, CHILD(n, 1)); @@ -837,9 +844,9 @@ expression = ast_for_expr(c, CHILD(n, 3)); if (!expression) return NULL; - - return Lambda(args, expression); } + + return Lambda(args, expression, LINENO(n)); } /* Count the number of 'for' loop in a list comprehension. @@ -958,7 +965,7 @@ if (asdl_seq_LEN(t) == 1) lc = comprehension(asdl_seq_GET(t, 0), expression, NULL); else - lc = comprehension(Tuple(t, Store), expression, NULL); + lc = comprehension(Tuple(t, Store, LINENO(ch)), expression, NULL); if (!lc) { asdl_seq_free(listcomps); @@ -1003,7 +1010,7 @@ asdl_seq_APPEND(listcomps, lc); } - return ListComp(elt, listcomps); + return ListComp(elt, listcomps, LINENO(n)); } /* @@ -1117,9 +1124,11 @@ } if (asdl_seq_LEN(t) == 1) - ge = comprehension(asdl_seq_GET(t, 0), expression, NULL); + ge = comprehension(asdl_seq_GET(t, 0), expression, + NULL); else - ge = comprehension(Tuple(t, Store), expression, NULL); + ge = comprehension(Tuple(t, Store, LINENO(ch)), + expression, NULL); if (!ge) { asdl_seq_free(genexps); @@ -1148,11 +1157,11 @@ for (j = 0; j < n_ifs; j++) { REQ(ch, gen_iter); - ch = CHILD(ch, 0); REQ(ch, gen_if); - asdl_seq_APPEND(ifs, ast_for_expr(c, CHILD(ch, 1))); + asdl_seq_APPEND(ifs, + ast_for_expr(c, CHILD(ch, 1))); if (NCH(ch) == 3) ch = CHILD(ch, 2); } @@ -1164,7 +1173,7 @@ asdl_seq_APPEND(genexps, ge); } - return GeneratorExp(elt, genexps); + return GeneratorExp(elt, genexps, LINENO(n)); } static expr_ty @@ -1179,14 +1188,14 @@ case NAME: /* All names start in Load context, but may later be changed. */ - return Name(NEW_IDENTIFIER(ch), Load); + return Name(NEW_IDENTIFIER(ch), Load, LINENO(n)); case STRING: { PyObject *str = parsestrplus(c, n); if (!str) return NULL; - return Str(str); + return Str(str, LINENO(n)); } case NUMBER: { PyObject *pynum = parsenumber(STR(ch)); @@ -1194,13 +1203,13 @@ if (!pynum) return NULL; - return Num(pynum); + return Num(pynum, LINENO(n)); } case LPAR: /* some parenthesized expressions */ ch = CHILD(n, 1); if (TYPE(ch) == RPAR) - return Tuple(NULL, Load); + return Tuple(NULL, Load, LINENO(n)); if ((NCH(ch) > 1) && (TYPE(CHILD(ch, 1)) == gen_for)) return ast_for_genexp(c, ch); @@ -1210,7 +1219,7 @@ ch = CHILD(n, 1); if (TYPE(ch) == RSQB) - return List(NULL, Load); + return List(NULL, Load, LINENO(n)); REQ(ch, listmaker); if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) { @@ -1219,7 +1228,7 @@ if (!elts) return NULL; - return List(elts, Load); + return List(elts, Load, LINENO(n)); } else return ast_for_listcomp(c, ch); @@ -1255,7 +1264,7 @@ asdl_seq_SET(values, i / 4, expression); } - return Dict(keys, values); + return Dict(keys, values, LINENO(n)); } case BACKQUOTE: { /* repr */ expr_ty expression = ast_for_testlist(c, CHILD(n, 1)); @@ -1263,7 +1272,7 @@ if (!expression) return NULL; - return Repr(expression); + return Repr(expression, LINENO(n)); } default: PyErr_Format(PyExc_Exception, "unhandled atom %d", @@ -1367,15 +1376,16 @@ if (!operator) return NULL; - result = BinOp(expr1, operator, expr2); + result = BinOp(expr1, operator, expr2, LINENO(n)); if (!result) return NULL; nops = (NCH(n) - 1) / 2; for (i = 1; i < nops; i++) { expr_ty tmp_result, tmp; + const node* next_oper = CHILD(n, i * 2 + 1); - operator = get_operator(CHILD(n, i * 2 + 1)); + operator = get_operator(next_oper); if (!operator) return NULL; @@ -1383,7 +1393,8 @@ if (!tmp) return NULL; - tmp_result = BinOp(result, operator, tmp); + tmp_result = BinOp(result, operator, tmp, + LINENO(next_oper)); if (!tmp) return NULL; result = tmp_result; @@ -1436,10 +1447,10 @@ asdl_seq_SET(seq, i / 2, e); } if (!strcmp(STR(CHILD(n, 1)), "and")) - return BoolOp(And, seq); + return BoolOp(And, seq, LINENO(n)); else { assert(!strcmp(STR(CHILD(n, 1)), "or")); - return BoolOp(Or, seq); + return BoolOp(Or, seq, LINENO(n)); } break; case not_test: @@ -1452,7 +1463,7 @@ if (!expression) return NULL; - return UnaryOp(Not, expression); + return UnaryOp(Not, expression, LINENO(n)); } case comparison: if (NCH(n) == 1) { @@ -1489,7 +1500,7 @@ if (!expression) return NULL; - return Compare(expression, ops, cmps); + return Compare(expression, ops, cmps, LINENO(n)); } break; @@ -1522,11 +1533,11 @@ switch (TYPE(CHILD(n, 0))) { case PLUS: - return UnaryOp(UAdd, expression); + return UnaryOp(UAdd, expression, LINENO(n)); case MINUS: - return UnaryOp(USub, expression); + return UnaryOp(USub, expression, LINENO(n)); case TILDE: - return UnaryOp(Invert, expression); + return UnaryOp(Invert, expression, LINENO(n)); } break; } @@ -1548,7 +1559,7 @@ break; if (TYPE(CHILD(ch, 0)) == LPAR) { if (NCH(ch) == 2) - new = Call(new, NULL, NULL, NULL, NULL); + new = Call(new, NULL, NULL, NULL, NULL, LINENO(ch)); else new = ast_for_call(c, CHILD(ch, 1), new); @@ -1567,7 +1578,7 @@ return NULL; } - new = Subscript(e, slc, Load); + new = Subscript(e, slc, Load, LINENO(ch)); if (!new) { /* XXX free(e); */ /* XXX free(slc); */ @@ -1592,7 +1603,7 @@ } asdl_seq_SET(slices, j / 2, slc); } - new = Subscript(e, ExtSlice(slices), Load); + new = Subscript(e, ExtSlice(slices), Load, LINENO(ch)); if (!new) { /* XXX free(e); */ asdl_seq_free(slices); @@ -1602,7 +1613,8 @@ } else { assert(TYPE(CHILD(ch, 0)) == DOT); - new = Attribute(e, NEW_IDENTIFIER(CHILD(ch, 1)), Load); + new = Attribute(e, NEW_IDENTIFIER(CHILD(ch, 1)), Load, + LINENO(ch)); if (!new) { /* XXX free(e); */ return NULL; @@ -1616,7 +1628,7 @@ /* XXX free(e); */ return NULL; } - return BinOp(e, Pow, f); + return BinOp(e, Pow, f, LINENO(n)); } return e; } @@ -1659,9 +1671,16 @@ } } if (ngens > 1 || (ngens && (nargs || nkeywords))) { - ast_error(n, "Generator expression must be parenthesised if not sole argument"); + ast_error(n, "Generator expression must be parenthesised " + "if not sole argument"); return NULL; } + + if (nargs + nkeywords + ngens > 255) { + ast_error(n, "more than 255 arguments"); + return NULL; + } + args = asdl_seq_new(nargs + ngens); if (!args) goto error; @@ -1727,8 +1746,7 @@ } } - /* XXX syntax error if more than 255 arguments */ - return Call(func, args, keywords, vararg, kwarg); + return Call(func, args, keywords, vararg, kwarg, LINENO(n)); error: if (args) @@ -1751,7 +1769,7 @@ if (!tmp) return NULL; - return Tuple(tmp, Load); + return Tuple(tmp, Load, LINENO(n)); } } @@ -1816,26 +1834,20 @@ for (i = 0; i < NCH(n) - 2; i += 2) { expr_ty e = ast_for_testlist(c, CHILD(n, i)); - if (e->kind == GeneratorExp_kind) { - ast_error(CHILD(n, i), - "assignment to generator expression not possible"); - asdl_seq_free(targets); - return NULL; - } - /* set context to assign */ - if (!e) { - asdl_seq_free(targets); - return NULL; - } - if (!set_context(e, Store, CHILD(n, i))) { - asdl_seq_free(targets); - return NULL; - } + if (!e) + goto error; + + if (!set_context(e, Store, CHILD(n, i))) + goto error; + asdl_seq_SET(targets, i / 2, e); } expression = ast_for_testlist(c, CHILD(n, NCH(n) - 1)); return Assign(targets, expression, LINENO(n)); + error: + asdl_seq_free(targets); + return NULL; } return NULL; } @@ -2499,7 +2511,7 @@ asdl_seq_free(_target); } else - target = Tuple(_target, Store); + target = Tuple(_target, Store, LINENO(n)); expression = ast_for_testlist(c, CHILD(n, 3)); if (!expression) Index: newcompile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/Attic/newcompile.c,v retrieving revision 1.1.2.115 retrieving revision 1.1.2.116 diff -u -d -r1.1.2.115 -r1.1.2.116 --- newcompile.c 11 Oct 2005 22:54:03 -0000 1.1.2.115 +++ newcompile.c 12 Oct 2005 03:18:07 -0000 1.1.2.116 @@ -3463,7 +3463,7 @@ switch (e->kind) { case Attribute_kind: auge = Attribute(e->v.Attribute.value, e->v.Attribute.attr, - AugLoad); + AugLoad, e->lineno); if (auge == NULL) return 0; VISIT(c, expr, auge); @@ -3475,7 +3475,7 @@ break; case Subscript_kind: auge = Subscript(e->v.Subscript.value, e->v.Subscript.slice, - AugLoad); + AugLoad, e->lineno); if (auge == NULL) return 0; VISIT(c, expr, auge); From nnorwitz at gmail.com Wed Oct 12 05:50:53 2005 From: nnorwitz at gmail.com (Neal Norwitz) Date: Tue, 11 Oct 2005 20:50:53 -0700 Subject: [Python-checkins] python/dist/src/Objects unicodeobject.c, 2.231, 2.232 In-Reply-To: <20051006203001.283881E4003@bag.python.org> References: <20051006203001.283881E4003@bag.python.org> Message-ID: I don't have a problem with this checkin, but in reviewing it I noticed something. It seems that the exceptionObject (exc) is returned from unicode_decode_call_errorhandler(), but it is never used other than calling Py_XDECREF(exc). It looked like goto onError always followed a failure of unicode_decode_call_errorhandler(). Is this the case? Can exceptionObject be removed? I didn't look at any other parameter to determine if each was necessary. I didn't study it long enough to be sure exc can be removed, but it sure looked like it from a moderate inspection. If we could simplify this code it would be nice. n -- On 10/6/05, doerwalter at users.sourceforge.net wrote: > Update of /cvsroot/python/python/dist/src/Objects > In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6122/Objects > > Modified Files: > unicodeobject.c > Log Message: > Part of SF patch #1313939: Speedup charmap decoding by extending > PyUnicode_DecodeCharmap() the accept a unicode string as the mapping > argument which is used as a mapping table. > > This code isn't used by any of the codecs yet. > > > Index: unicodeobject.c > =================================================================== > RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v > retrieving revision 2.231 > retrieving revision 2.232 > diff -u -d -r2.231 -r2.232 > --- unicodeobject.c 30 Aug 2005 10:23:14 -0000 2.231 > +++ unicodeobject.c 6 Oct 2005 20:29:57 -0000 2.232 > @@ -2833,6 +2833,8 @@ > int extrachars = 0; > PyObject *errorHandler = NULL; > PyObject *exc = NULL; > + Py_UNICODE *mapstring = NULL; > + int maplen = 0; > > /* Default to Latin-1 */ > if (mapping == NULL) > @@ -2845,91 +2847,121 @@ > return (PyObject *)v; > p = PyUnicode_AS_UNICODE(v); > e = s + size; > - while (s < e) { > - unsigned char ch = *s; > - PyObject *w, *x; > + if (PyUnicode_CheckExact(mapping)) { > + mapstring = PyUnicode_AS_UNICODE(mapping); > + maplen = PyUnicode_GET_SIZE(mapping); > + while (s < e) { > + unsigned char ch = *s; > + Py_UNICODE x = 0xfffe; /* illegal value */ > > - /* Get mapping (char ordinal -> integer, Unicode char or None) */ > - w = PyInt_FromLong((long)ch); > - if (w == NULL) > - goto onError; > - x = PyObject_GetItem(mapping, w); > - Py_DECREF(w); > - if (x == NULL) { > - if (PyErr_ExceptionMatches(PyExc_LookupError)) { > - /* No mapping found means: mapping is undefined. */ > - PyErr_Clear(); > - x = Py_None; > - Py_INCREF(x); > - } else > - goto onError; > - } > + if (ch < maplen) > + x = mapstring[ch]; > > - /* Apply mapping */ > - if (PyInt_Check(x)) { > - long value = PyInt_AS_LONG(x); > - if (value < 0 || value > 65535) { > - PyErr_SetString(PyExc_TypeError, > - "character mapping must be in range(65536)"); > - Py_DECREF(x); > - goto onError; > + if (x == 0xfffe) { > + /* undefined mapping */ > + outpos = p-PyUnicode_AS_UNICODE(v); > + startinpos = s-starts; > + endinpos = startinpos+1; > + if (unicode_decode_call_errorhandler( > + errors, &errorHandler, > + "charmap", "character maps to ", > + starts, size, &startinpos, &endinpos, &exc, &s, > + (PyObject **)&v, &outpos, &p)) { > + goto onError; > + } > + continue; > } > - *p++ = (Py_UNICODE)value; > + *p++ = x; > + ++s; > } > - else if (x == Py_None) { > - /* undefined mapping */ > - outpos = p-PyUnicode_AS_UNICODE(v); > - startinpos = s-starts; > - endinpos = startinpos+1; > - if (unicode_decode_call_errorhandler( > - errors, &errorHandler, > - "charmap", "character maps to ", > - starts, size, &startinpos, &endinpos, &exc, &s, > - (PyObject **)&v, &outpos, &p)) { > - Py_DECREF(x); > + } > + else { > + while (s < e) { > + unsigned char ch = *s; > + PyObject *w, *x; > + > + /* Get mapping (char ordinal -> integer, Unicode char or None) */ > + w = PyInt_FromLong((long)ch); > + if (w == NULL) > goto onError; > + x = PyObject_GetItem(mapping, w); > + Py_DECREF(w); > + if (x == NULL) { > + if (PyErr_ExceptionMatches(PyExc_LookupError)) { > + /* No mapping found means: mapping is undefined. */ > + PyErr_Clear(); > + x = Py_None; > + Py_INCREF(x); > + } else > + goto onError; > } > - continue; > - } > - else if (PyUnicode_Check(x)) { > - int targetsize = PyUnicode_GET_SIZE(x); > - > - if (targetsize == 1) > - /* 1-1 mapping */ > - *p++ = *PyUnicode_AS_UNICODE(x); > - > - else if (targetsize > 1) { > - /* 1-n mapping */ > - if (targetsize > extrachars) { > - /* resize first */ > - int oldpos = (int)(p - PyUnicode_AS_UNICODE(v)); > - int needed = (targetsize - extrachars) + \ > - (targetsize << 2); > - extrachars += needed; > - if (_PyUnicode_Resize(&v, > - PyUnicode_GET_SIZE(v) + needed) < 0) { > - Py_DECREF(x); > - goto onError; > + > + /* Apply mapping */ > + if (PyInt_Check(x)) { > + long value = PyInt_AS_LONG(x); > + if (value < 0 || value > 65535) { > + PyErr_SetString(PyExc_TypeError, > + "character mapping must be in range(65536)"); > + Py_DECREF(x); > + goto onError; > + } > + *p++ = (Py_UNICODE)value; > + } > + else if (x == Py_None) { > + /* undefined mapping */ > + outpos = p-PyUnicode_AS_UNICODE(v); > + startinpos = s-starts; > + endinpos = startinpos+1; > + if (unicode_decode_call_errorhandler( > + errors, &errorHandler, > + "charmap", "character maps to ", > + starts, size, &startinpos, &endinpos, &exc, &s, > + (PyObject **)&v, &outpos, &p)) { > + Py_DECREF(x); > + goto onError; > + } > + continue; > + } > + else if (PyUnicode_Check(x)) { > + int targetsize = PyUnicode_GET_SIZE(x); > + > + if (targetsize == 1) > + /* 1-1 mapping */ > + *p++ = *PyUnicode_AS_UNICODE(x); > + > + else if (targetsize > 1) { > + /* 1-n mapping */ > + if (targetsize > extrachars) { > + /* resize first */ > + int oldpos = (int)(p - PyUnicode_AS_UNICODE(v)); > + int needed = (targetsize - extrachars) + \ > + (targetsize << 2); > + extrachars += needed; > + if (_PyUnicode_Resize(&v, > + PyUnicode_GET_SIZE(v) + needed) < 0) { > + Py_DECREF(x); > + goto onError; > + } > + p = PyUnicode_AS_UNICODE(v) + oldpos; > } > - p = PyUnicode_AS_UNICODE(v) + oldpos; > + Py_UNICODE_COPY(p, > + PyUnicode_AS_UNICODE(x), > + targetsize); > + p += targetsize; > + extrachars -= targetsize; > } > - Py_UNICODE_COPY(p, > - PyUnicode_AS_UNICODE(x), > - targetsize); > - p += targetsize; > - extrachars -= targetsize; > + /* 1-0 mapping: skip the character */ > + } > + else { > + /* wrong return value */ > + PyErr_SetString(PyExc_TypeError, > + "character mapping must return integer, None or unicode"); > + Py_DECREF(x); > + goto onError; > } > - /* 1-0 mapping: skip the character */ > - } > - else { > - /* wrong return value */ > - PyErr_SetString(PyExc_TypeError, > - "character mapping must return integer, None or unicode"); > Py_DECREF(x); > - goto onError; > + ++s; > } > - Py_DECREF(x); > - ++s; > } > if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v)) > if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))) < 0) > > _______________________________________________ > Python-checkins mailing list > Python-checkins at python.org > http://mail.python.org/mailman/listinfo/python-checkins > From nnorwitz at users.sourceforge.net Wed Oct 12 05:58:17 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Wed, 12 Oct 2005 05:58:17 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/api abstract.tex,1.35,1.36 Message-ID: <20051012035817.956951E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/api In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19550/Doc/api Modified Files: abstract.tex Log Message: SF bug #1323739, C API doc for PySequence_Tuple duplicated Index: abstract.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/api/abstract.tex,v retrieving revision 1.35 retrieving revision 1.36 diff -u -d -r1.35 -r1.36 --- abstract.tex 7 Aug 2004 17:53:05 -0000 1.35 +++ abstract.tex 12 Oct 2005 03:58:14 -0000 1.36 @@ -778,12 +778,6 @@ the Python statement \samp{del \var{o}[\var{i1}:\var{i2}]}. \end{cfuncdesc} -\begin{cfuncdesc}{PyObject*}{PySequence_Tuple}{PyObject *o} - Returns the \var{o} as a tuple on success, and \NULL{} on failure. - This is equivalent to the Python expression \samp{tuple(\var{o})}. - \bifuncindex{tuple} -\end{cfuncdesc} - \begin{cfuncdesc}{int}{PySequence_Count}{PyObject *o, PyObject *value} Return the number of occurrences of \var{value} in \var{o}, that is, return the number of keys for which \code{\var{o}[\var{key}] == @@ -811,9 +805,11 @@ \begin{cfuncdesc}{PyObject*}{PySequence_Tuple}{PyObject *o} Return a tuple object with the same contents as the arbitrary - sequence \var{o}. If \var{o} is a tuple, a new reference will be - returned, otherwise a tuple will be constructed with the appropriate - contents. + sequence \var{o} or \NULL{} on failure. If \var{o} is a tuple, + a new reference will be returned, otherwise a tuple will be + constructed with the appropriate contents. This is equivalent + to the Python expression \samp{tuple(\var{o})}. + \bifuncindex{tuple} \end{cfuncdesc} \begin{cfuncdesc}{PyObject*}{PySequence_Fast}{PyObject *o, const char *m} From nnorwitz at users.sourceforge.net Wed Oct 12 06:00:00 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Wed, 12 Oct 2005 06:00:00 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/api abstract.tex, 1.35, 1.35.2.1 Message-ID: <20051012040000.17F141E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/api In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19923/Doc/api Modified Files: Tag: release24-maint abstract.tex Log Message: Backport: SF bug #1323739, C API doc for PySequence_Tuple duplicated Index: abstract.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/api/abstract.tex,v retrieving revision 1.35 retrieving revision 1.35.2.1 diff -u -d -r1.35 -r1.35.2.1 --- abstract.tex 7 Aug 2004 17:53:05 -0000 1.35 +++ abstract.tex 12 Oct 2005 03:59:56 -0000 1.35.2.1 @@ -778,12 +778,6 @@ the Python statement \samp{del \var{o}[\var{i1}:\var{i2}]}. \end{cfuncdesc} -\begin{cfuncdesc}{PyObject*}{PySequence_Tuple}{PyObject *o} - Returns the \var{o} as a tuple on success, and \NULL{} on failure. - This is equivalent to the Python expression \samp{tuple(\var{o})}. - \bifuncindex{tuple} -\end{cfuncdesc} - \begin{cfuncdesc}{int}{PySequence_Count}{PyObject *o, PyObject *value} Return the number of occurrences of \var{value} in \var{o}, that is, return the number of keys for which \code{\var{o}[\var{key}] == @@ -811,9 +805,11 @@ \begin{cfuncdesc}{PyObject*}{PySequence_Tuple}{PyObject *o} Return a tuple object with the same contents as the arbitrary - sequence \var{o}. If \var{o} is a tuple, a new reference will be - returned, otherwise a tuple will be constructed with the appropriate - contents. + sequence \var{o} or \NULL{} on failure. If \var{o} is a tuple, + a new reference will be returned, otherwise a tuple will be + constructed with the appropriate contents. This is equivalent + to the Python expression \samp{tuple(\var{o})}. + \bifuncindex{tuple} \end{cfuncdesc} \begin{cfuncdesc}{PyObject*}{PySequence_Fast}{PyObject *o, const char *m} From walter at livinglogic.de Wed Oct 12 09:40:54 2005 From: walter at livinglogic.de (=?ISO-8859-1?Q?Walter_D=F6rwald?=) Date: Wed, 12 Oct 2005 09:40:54 +0200 Subject: [Python-checkins] python/dist/src/Objects unicodeobject.c, 2.231, 2.232 In-Reply-To: References: <20051006203001.283881E4003@bag.python.org> Message-ID: <3982ACF8-A08B-4EB0-97FA-C98367156530@livinglogic.de> Am 12.10.2005 um 05:50 schrieb Neal Norwitz: > I don't have a problem with this checkin, but in reviewing it I > noticed something. It seems that the exceptionObject (exc) is > returned from unicode_decode_call_errorhandler(), but it is never used > other than calling Py_XDECREF(exc). It looked like goto onError > always followed a failure of unicode_decode_call_errorhandler(). It does, but when unicode_decode_call_errorhandler() *doesn't* fail, it might be called a second time later on. For performance reasons unicode_decode_call_errorhandler() then doesn't create a new exception object to be passed to the error handler, but reuses the old one (the PEP explicitely allows this). > Is this the case? Can exceptionObject be removed? I didn't look at > any other parameter to determine if each was necessary. I didn't > study it long enough to be sure exc can be removed, but it sure looked > like it from a moderate inspection. > > If we could simplify this code it would be nice. Yes, but I don't see how. Bye, Walter D?rwald From marketing at omtel.co.za Wed Oct 12 16:07:40 2005 From: marketing at omtel.co.za (Omtel) Date: Wed, 12 Oct 2005 16:07:40 +0200 Subject: [Python-checkins] Your registration at OMTEL Message-ID: <614e80ac5bd11bb9831dd4271fca831e@www.mailerdemon.co.za> Many thanks for your inquiry on OMTEL You want to reduce considerable your phone costs with OMTEL. You want to phone free of charge? You want to open a callshop? You want to represent our company in your country? You get all information about on our homepage http://www.omtel.de Register yourselve on our registration area and you will get all further information. We are available for further inquiries under marketing at omtel.co.za Yours sincerely Your OMTEL team This is a automatic generated e-mail. Should this e-mail not be wished by you or by a mistake reached you, please inform us. From akuchling at users.sourceforge.net Wed Oct 12 18:08:32 2005 From: akuchling at users.sourceforge.net (akuchling@users.sourceforge.net) Date: Wed, 12 Oct 2005 18:08:32 +0200 (CEST) Subject: [Python-checkins] python/nondist/peps pep2html.py,1.53,1.54 Message-ID: <20051012160832.3993E1E4002@bag.python.org> Update of /cvsroot/python/python/nondist/peps In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24750 Modified Files: pep2html.py Log Message: Use new www server hostname Index: pep2html.py =================================================================== RCS file: /cvsroot/python/python/nondist/peps/pep2html.py,v retrieving revision 1.53 retrieving revision 1.54 diff -u -d -r1.53 -r1.54 --- pep2html.py 19 Jul 2004 19:05:01 -0000 1.53 +++ pep2html.py 12 Oct 2005 16:08:28 -0000 1.54 @@ -23,7 +23,7 @@ -l, --local Same as -i/--install, except install on the local machine. Use this - when logged in to the python.org machine (creosote). + when logged in to the python.org machine (dinsdale). -q, --quiet Turn off verbose messages. @@ -54,7 +54,7 @@ PEPDIRRUL = 'http://www.python.org/peps/' -HOST = "www.python.org" # host for update +HOST = "dinsdale.python.org" # host for update HDIR = "/ftp/ftp.python.org/pub/www.python.org/peps" # target host directory LOCALVARS = "Local Variables:" From nascheme at users.sourceforge.net Thu Oct 13 01:36:14 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Thu, 13 Oct 2005 01:36:14 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python newcompile.c, 1.1.2.116, 1.1.2.117 Message-ID: <20051012233614.9B4A81E4110@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16154/Python Modified Files: Tag: ast-branch newcompile.c Log Message: Update u_lineno when generating code for expressions. Index: newcompile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/Attic/newcompile.c,v retrieving revision 1.1.2.116 retrieving revision 1.1.2.117 diff -u -d -r1.1.2.116 -r1.1.2.117 --- newcompile.c 12 Oct 2005 03:18:07 -0000 1.1.2.116 +++ newcompile.c 12 Oct 2005 23:36:09 -0000 1.1.2.117 @@ -3344,6 +3344,11 @@ { int i, n; + fprintf(stderr, "compile expr %d lineno %d %d\n", e->kind, e->lineno); + if (e->lineno > c->u->u_lineno) { + c->u->u_lineno = e->lineno; + c->u->u_lineno_set = false; + } switch (e->kind) { case BoolOp_kind: return compiler_boolop(c, e); From nascheme at users.sourceforge.net Thu Oct 13 02:32:48 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Thu, 13 Oct 2005 02:32:48 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python newcompile.c, 1.1.2.117, 1.1.2.118 Message-ID: <20051013003248.37C7F1E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30515/Python Modified Files: Tag: ast-branch newcompile.c Log Message: Move comment to correct location. Fix a bug in a debug print statement. Index: newcompile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/Attic/newcompile.c,v retrieving revision 1.1.2.117 retrieving revision 1.1.2.118 diff -u -d -r1.1.2.117 -r1.1.2.118 --- newcompile.c 12 Oct 2005 23:36:09 -0000 1.1.2.117 +++ newcompile.c 13 Oct 2005 00:32:45 -0000 1.1.2.118 @@ -1414,10 +1414,6 @@ return b->b_iused++; } -/* Add an opcode with no argument. - Returns 0 on failure, 1 on success. -*/ - static void compiler_set_lineno(struct compiler *c, int off) { @@ -1646,6 +1642,10 @@ return 0; /* not reachable */ } +/* Add an opcode with no argument. + Returns 0 on failure, 1 on success. +*/ + static int compiler_addop(struct compiler *c, int opcode) { @@ -3344,7 +3344,7 @@ { int i, n; - fprintf(stderr, "compile expr %d lineno %d %d\n", e->kind, e->lineno); + fprintf(stderr, "compile expr %d lineno %d\n", e->kind, e->lineno); if (e->lineno > c->u->u_lineno) { c->u->u_lineno = e->lineno; c->u->u_lineno_set = false; From nascheme at users.sourceforge.net Thu Oct 13 06:46:02 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Thu, 13 Oct 2005 06:46:02 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python newcompile.c, 1.1.2.118, 1.1.2.119 Message-ID: <20051013044602.28A711E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12137/Python Modified Files: Tag: ast-branch newcompile.c Log Message: The line number for the first instruction of a unit may not be co_firstlineno. Fix that. Also, set firstlineno properly for Lambda and GenExpr nodes. Index: newcompile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/Attic/newcompile.c,v retrieving revision 1.1.2.118 retrieving revision 1.1.2.119 diff -u -d -r1.1.2.118 -r1.1.2.119 --- newcompile.c 13 Oct 2005 00:32:45 -0000 1.1.2.118 +++ newcompile.c 13 Oct 2005 04:45:58 -0000 1.1.2.119 @@ -2150,7 +2150,7 @@ if (args->defaults) VISIT_SEQ(c, expr, args->defaults); - if (!compiler_enter_scope(c, name, (void *)e, c->u->u_lineno)) + if (!compiler_enter_scope(c, name, (void *)e, e->lineno)) return 0; /* unpack nested arguments */ @@ -3293,7 +3293,7 @@ if (!name) return 0; - if (!compiler_enter_scope(c, name, (void *)e, c->u->u_lineno)) + if (!compiler_enter_scope(c, name, (void *)e, e->lineno)) return 0; compiler_genexp_generator(c, e->v.GeneratorExp.generators, 0, e->v.GeneratorExp.elt); @@ -3797,10 +3797,10 @@ } static int -assemble_init(struct assembler *a, int nblocks) +assemble_init(struct assembler *a, int nblocks, int firstlineno) { memset(a, 0, sizeof(struct assembler)); - a->a_lineno = 1; + a->a_lineno = firstlineno; a->a_bytecode = PyString_FromStringAndSize(NULL, DEFAULT_CODE_SIZE); if (!a->a_bytecode) return 0; @@ -3987,7 +3987,7 @@ } else { /* First line of a block; def stmt, etc. */ *lnotab++ = 0; - *lnotab++ = 1; + *lnotab++ = d_lineno; } a->a_lineno = i->i_lineno; a->a_lineno_off = a->a_offset; @@ -4226,7 +4226,7 @@ entryblock = b; } - if (!assemble_init(&a, nblocks)) + if (!assemble_init(&a, nblocks, c->u->u_firstlineno)) goto error; dfs(c, entryblock, &a); From kzl665544 at eyou.com Thu Oct 13 08:56:47 2005 From: kzl665544 at eyou.com (Paul) Date: Thu, 13 Oct 2005 14:56:47 +0800 Subject: [Python-checkins] Marketing for you Message-ID: <20051013065644.25D651E4002@bag.python.org> Dear python-checkins at python.org: We specializing in E-mail Marketing. 1. Targeted list We may provide targeted email list. We will customize the list according to your requirements. 2. Sending out Targeted Marketing for you We may send your email message to your target clients! We will customize your email list and send your message for you. * We offer BP web Hosting & mail server. Regards! Paul Marketing Team kezunli321 at yeah.net To Bye: PaulNo at hotmail.com From jhylton at users.sourceforge.net Thu Oct 13 17:42:44 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 13 Oct 2005 17:42:44 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python ast.c,1.1.2.66,1.1.2.67 Message-ID: <20051013154244.790711E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13934/Python Modified Files: Tag: ast-branch ast.c Log Message: Fix two separate problems. Generate expected error for assignments to literals, e.g. 1 = 1 or (1,) = 1. Fix assignment of lineno for FromImport statements: Use the line number of the module being imported, not of the names being bound from it. Fixes test_future. Index: ast.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/Attic/ast.c,v retrieving revision 1.1.2.66 retrieving revision 1.1.2.67 diff -u -d -r1.1.2.66 -r1.1.2.67 --- ast.c 12 Oct 2005 03:18:07 -0000 1.1.2.66 +++ ast.c 13 Oct 2005 15:42:40 -0000 1.1.2.67 @@ -392,10 +392,20 @@ case GeneratorExp_kind: return ast_error(n, "assignment to generator expression " "not possible"); - default: - return ast_error(n, "unexpected node in assignment"); - break; + case Num_kind: + case Str_kind: + return ast_error(n, "can't assign to literal"); + default: { + char buf[300]; + PyOS_snprintf(buf, sizeof(buf), + "unexpected expression in assignment %d (line %d)", + e->kind, e->lineno); + return ast_error(n, buf); + } } + /* If the LHS is a list or tuple, we need to set the assignment + context for all the tuple elements. + */ if (s) { int i; @@ -2112,6 +2122,7 @@ stmt_ty import; int n_children; const char *from_modules; + int lineno = LINENO(n); alias_ty mod = alias_for_import_name(CHILD(n, 1)); if (!mod) return NULL; @@ -2129,7 +2140,7 @@ } } else if (from_modules[0] == '*') { - n = CHILD(n,3); /* from ... import * */ + n = CHILD(n, 3); /* from ... import * */ } else if (from_modules[0] == '(') n = CHILD(n, 4); /* from ... import (x, y, z) */ @@ -2166,7 +2177,7 @@ } asdl_seq_APPEND(aliases, import_alias); } - import = ImportFrom(mod->name, aliases, LINENO(n)); + import = ImportFrom(mod->name, aliases, lineno); free(mod); return import; } From jhylton at users.sourceforge.net Thu Oct 13 18:11:19 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 13 Oct 2005 18:11:19 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_genexps.py, 1.7.4.1, 1.7.4.2 Message-ID: <20051013161119.128551E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22475/Lib/test Modified Files: Tag: ast-branch test_genexps.py Log Message: Track change to genexp syntax error string. Index: test_genexps.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_genexps.py,v retrieving revision 1.7.4.1 retrieving revision 1.7.4.2 diff -u -d -r1.7.4.1 -r1.7.4.2 --- test_genexps.py 7 Jan 2005 06:59:09 -0000 1.7.4.1 +++ test_genexps.py 13 Oct 2005 16:11:15 -0000 1.7.4.2 @@ -125,12 +125,13 @@ >>> (y for y in (1,2)) = 10 Traceback (most recent call last): ... - SyntaxError: assign to generator expression not possible + SyntaxError: assignment to generator expression not possible (, line 1) >>> (y for y in (1,2)) += 10 Traceback (most recent call last): ... - SyntaxError: augmented assign to tuple literal or generator expression not possible + SyntaxError: augmented assignment to generator expression not possible (, line 1) + From nascheme at users.sourceforge.net Thu Oct 13 18:58:43 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Thu, 13 Oct 2005 18:58:43 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_trace.py, 1.7.10.2, 1.7.10.3 Message-ID: <20051013165843.209021E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv679/Lib/test Modified Files: Tag: ast-branch test_trace.py Log Message: Disable tests that cause the interpreter to crash. I think frame_setlineno() needs to be fixed. Index: test_trace.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_trace.py,v retrieving revision 1.7.10.2 retrieving revision 1.7.10.3 diff -u -d -r1.7.10.2 -r1.7.10.3 --- test_trace.py 7 Jan 2005 06:59:13 -0000 1.7.10.2 +++ test_trace.py 13 Oct 2005 16:58:39 -0000 1.7.10.3 @@ -576,14 +576,17 @@ self.run_test(no_jump_too_far_forwards) def test_09_no_jump_too_far_backwards(self): self.run_test(no_jump_too_far_backwards) - def test_10_no_jump_to_except_1(self): - self.run_test(no_jump_to_except_1) - def test_11_no_jump_to_except_2(self): - self.run_test(no_jump_to_except_2) - def test_12_no_jump_to_except_3(self): - self.run_test(no_jump_to_except_3) - def test_13_no_jump_to_except_4(self): - self.run_test(no_jump_to_except_4) +# XXX: These tests cause the interpreter to crash. The frame_setlineno() +# function no longer works correctly because the lineno table generated by +# the AST compiler is slightly different than with the old compiler. +# def test_10_no_jump_to_except_1(self): +# self.run_test(no_jump_to_except_1) +# def test_11_no_jump_to_except_2(self): +# self.run_test(no_jump_to_except_2) +# def test_12_no_jump_to_except_3(self): +# self.run_test(no_jump_to_except_3) +# def test_13_no_jump_to_except_4(self): +# self.run_test(no_jump_to_except_4) def test_14_no_jump_forwards_into_block(self): self.run_test(no_jump_forwards_into_block) def test_15_no_jump_backwards_into_block(self): From gvanrossum at users.sourceforge.net Thu Oct 13 23:04:18 2005 From: gvanrossum at users.sourceforge.net (gvanrossum@users.sourceforge.net) Date: Thu, 13 Oct 2005 23:04:18 +0200 (CEST) Subject: [Python-checkins] python/nondist/peps pep2html.py,1.54,1.55 Message-ID: <20051013210418.888F81E4002@bag.python.org> Update of /cvsroot/python/python/nondist/peps In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27627 Modified Files: pep2html.py Log Message: The root of the tree on dinsdale is /data, not /ftp. Index: pep2html.py =================================================================== RCS file: /cvsroot/python/python/nondist/peps/pep2html.py,v retrieving revision 1.54 retrieving revision 1.55 diff -u -d -r1.54 -r1.55 --- pep2html.py 12 Oct 2005 16:08:28 -0000 1.54 +++ pep2html.py 13 Oct 2005 21:04:15 -0000 1.55 @@ -55,7 +55,7 @@ HOST = "dinsdale.python.org" # host for update -HDIR = "/ftp/ftp.python.org/pub/www.python.org/peps" # target host directory +HDIR = "/data/ftp.python.org/pub/www.python.org/peps" # target host directory LOCALVARS = "Local Variables:" COMMENT = """", self.name if self.condition: Output() @@ -50,6 +60,8 @@ OutRbrace() def reference(self, name = None): + if not self.checkgenerate(): + return if name is None: name = self.name docstring = self.docstring() @@ -175,17 +187,8 @@ arg.declare() def getargs(self): - fmt = "" - lst = "" sep = ",\n" + ' '*len("if (!PyArg_ParseTuple(") - for arg in self.argumentList: - if arg.flags == SelfMode: - continue - if arg.mode in (InMode, InOutMode): - fmt = fmt + arg.getargsFormat() - args = arg.getargsArgs() - if args: - lst = lst + sep + args + fmt, lst = self.getargsFormatArgs(sep) Output("if (!PyArg_ParseTuple(_args, \"%s\"%s))", fmt, lst) IndentLevel() Output("return NULL;") @@ -196,15 +199,32 @@ if arg.mode in (InMode, InOutMode): arg.getargsCheck() + def getargsFormatArgs(self, sep): + fmt = "" + lst = "" + for arg in self.argumentList: + if arg.flags == SelfMode: + continue + if arg.mode in (InMode, InOutMode): + arg.getargsPreCheck() + fmt = fmt + arg.getargsFormat() + args = arg.getargsArgs() + if args: + lst = lst + sep + args + return fmt, lst + def precheck(self): pass + def beginallowthreads(self): + pass + + def endallowthreads(self): + pass + def callit(self): args = "" - if self.rv: - s = "%s = %s(" % (self.rv.name, self.name) - else: - s = "%s(" % self.name + s = "%s%s(" % (self.getrvforcallit(), self.callname) sep = ",\n" + ' '*len(s) for arg in self.argumentList: if arg is self.rv: @@ -212,26 +232,24 @@ s = arg.passArgument() if args: s = sep + s args = args + s + self.beginallowthreads() + Output("%s%s(%s);", + self.getrvforcallit(), self.callname, args) + self.endallowthreads() + + def getrvforcallit(self): if self.rv: - Output("%s = %s(%s);", - self.rv.name, self.name, args) + return "%s = " % self.rv.name else: - Output("%s(%s);", self.name, args) + return "" def checkit(self): for arg in self.argumentList: arg.errorCheck() def returnvalue(self): - fmt = "" - lst = "" sep = ",\n" + ' '*len("return Py_BuildValue(") - for arg in self.argumentList: - if not arg: continue - if arg.flags == ErrorMode: continue - if arg.mode in (OutMode, InOutMode): - fmt = fmt + arg.mkvalueFormat() - lst = lst + sep + arg.mkvalueArgs() + fmt, lst = self.mkvalueFormatArgs(sep) if fmt == "": Output("Py_INCREF(Py_None);") Output("_res = Py_None;"); @@ -244,6 +262,17 @@ arg.cleanup() Output("return _res;") + def mkvalueFormatArgs(self, sep): + fmt = "" + lst = "" + for arg in self.argumentList: + if not arg: continue + if arg.flags == ErrorMode: continue + if arg.mode in (OutMode, InOutMode): + arg.mkvaluePreCheck() + fmt = fmt + arg.mkvalueFormat() + lst = lst + sep + arg.mkvalueArgs() + return fmt, lst class MethodGenerator(FunctionGenerator): @@ -256,7 +285,6 @@ self.argumentList.append(self.itself) FunctionGenerator.parseArgumentList(self, args) - def _test(): void = None eggs = FunctionGenerator(void, "eggs", Index: bgenHeapBuffer.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Tools/bgen/bgen/bgenHeapBuffer.py,v retrieving revision 1.3.16.2 retrieving revision 1.3.16.3 diff -u -d -r1.3.16.2 -r1.3.16.3 --- bgenHeapBuffer.py 7 Jan 2005 07:05:04 -0000 1.3.16.2 +++ bgenHeapBuffer.py 16 Oct 2005 05:24:06 -0000 1.3.16.3 @@ -16,8 +16,10 @@ def __init__(self, datatype = 'char', sizetype = 'int', sizeformat = None): FixedInputOutputBufferType.__init__(self, "0", datatype, sizetype, sizeformat) - def declareOutputBuffer(self, name): - Output("%s *%s__out__;", self.datatype, name) + def getOutputBufferDeclarations(self, name, constmode=False): + if constmode: + raise RuntimeError, "Cannot use const output buffer" + return ["%s *%s__out__" % (self.datatype, name)] def getargsCheck(self, name): Output("if ((%s__out__ = malloc(%s__in_len__)) == NULL)", name, name) @@ -74,8 +76,8 @@ Call from Python with buffer size. """ - def declareInputBuffer(self, name): - pass + def getInputBufferDeclarations(self, name, constmode=False): + return [] def getargsFormat(self): return "i" @@ -109,3 +111,32 @@ def passOutput(self, name): return "%s__out__, %s__len__, &%s__len__" % (name, name, name) + +class MallocHeapOutputBufferType(HeapOutputBufferType): + """Output buffer allocated by the called function -- passed as (&buffer, &size). + + Instantiate without parameters. + Call from Python without parameters. + """ + + def getargsCheck(self, name): + Output("%s__out__ = NULL;", name) + + def getAuxDeclarations(self, name): + return [] + + def passOutput(self, name): + return "&%s__out__, &%s__len__" % (name, name) + + def getargsFormat(self): + return "" + + def getargsArgs(self, name): + return None + + def mkvalueFormat(self): + return "z#" + + def cleanup(self, name): + Output("if( %s__out__ ) free(%s__out__);", name, name) + Index: bgenObjectDefinition.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Tools/bgen/bgen/bgenObjectDefinition.py,v retrieving revision 1.16.2.2 retrieving revision 1.16.2.3 diff -u -d -r1.16.2.2 -r1.16.2.3 --- bgenObjectDefinition.py 7 Jan 2005 07:05:04 -0000 1.16.2.2 +++ bgenObjectDefinition.py 16 Oct 2005 05:24:06 -0000 1.16.2.3 @@ -6,6 +6,7 @@ basechain = "NULL" tp_flags = "Py_TPFLAGS_DEFAULT" basetype = None + argref = "" # set to "*" if arg to _New should be pointer def __init__(self, name, prefix, itselftype): """ObjectDefinition constructor. May be extended, but do not override. @@ -22,7 +23,6 @@ self.itselftype = itselftype self.objecttype = name + 'Object' self.typename = name + '_Type' - self.argref = "" # set to "*" if arg to _New should be pointer self.static = "static " # set to "" to make _New and _Convert public self.modulename = None if hasattr(self, "assertions"): @@ -44,12 +44,8 @@ OutHeader2("Object type " + self.name) - sf = self.static and "static " - Output("%sPyTypeObject %s;", sf, self.typename) - Output() - Output("#define %s_Check(x) ((x)->ob_type == &%s || PyObject_TypeCheck((x), &%s))", - self.prefix, self.typename, self.typename) - Output() + self.outputCheck() + Output("typedef struct %s {", self.objecttype) IndentLevel() Output("PyObject_HEAD") @@ -84,6 +80,14 @@ OutHeader2("End object type " + self.name) + def outputCheck(self): + sf = self.static and "static " + Output("%sPyTypeObject %s;", sf, self.typename) + Output() + Output("#define %s_Check(x) ((x)->ob_type == &%s || PyObject_TypeCheck((x), &%s))", + self.prefix, self.typename, self.typename) + Output() + def outputMethodChain(self): Output("%sPyMethodChain %s_chain = { %s_methods, %s };", self.static, self.prefix, self.prefix, self.basechain) @@ -113,6 +117,7 @@ "Override this method to apply additional checks/conversions" def outputConvert(self): + Output() Output("%sint %s_Convert(PyObject *v, %s *p_itself)", self.static, self.prefix, self.itselftype) OutLbrace() @@ -214,6 +219,9 @@ Output("if (PyType_Ready(&%s) < 0) return;", self.typename) Output("""Py_INCREF(&%s);""", self.typename) Output("PyModule_AddObject(m, \"%s\", (PyObject *)&%s);", self.name, self.typename); + self.outputTypeObjectInitializerCompat() + + def outputTypeObjectInitializerCompat(self): Output("/* Backward-compatible name */") Output("""Py_INCREF(&%s);""", self.typename); Output("PyModule_AddObject(m, \"%sType\", (PyObject *)&%s);", self.name, self.typename); @@ -377,7 +385,7 @@ def output_tp_init(self): if self.output_tp_initBody: - Output("static int %s_tp_init(PyObject *self, PyObject *args, PyObject *kwds)", self.prefix) + Output("static int %s_tp_init(PyObject *_self, PyObject *_args, PyObject *_kwds)", self.prefix) OutLbrace() self.output_tp_initBody() OutRbrace() @@ -399,19 +407,19 @@ Output() def output_tp_newBody(self): - Output("PyObject *self;"); + Output("PyObject *_self;"); Output("%s itself;", self.itselftype); Output("char *kw[] = {\"itself\", 0};") Output() - Output("if (!PyArg_ParseTupleAndKeywords(args, kwds, \"O&\", kw, %s_Convert, &itself)) return NULL;", + Output("if (!PyArg_ParseTupleAndKeywords(_args, _kwds, \"O&\", kw, %s_Convert, &itself)) return NULL;", self.prefix); - Output("if ((self = type->tp_alloc(type, 0)) == NULL) return NULL;") - Output("((%s *)self)->ob_itself = itself;", self.objecttype) - Output("return self;") + Output("if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL;") + Output("((%s *)_self)->ob_itself = itself;", self.objecttype) + Output("return _self;") def output_tp_new(self): if self.output_tp_newBody: - Output("static PyObject *%s_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds)", self.prefix) + Output("static PyObject *%s_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds)", self.prefix) OutLbrace() self.output_tp_newBody() OutRbrace() Index: bgenStackBuffer.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Tools/bgen/bgen/bgenStackBuffer.py,v retrieving revision 1.2.30.1 retrieving revision 1.2.30.2 diff -u -d -r1.2.30.1 -r1.2.30.2 --- bgenStackBuffer.py 28 Apr 2003 17:16:37 -0000 1.2.30.1 +++ bgenStackBuffer.py 16 Oct 2005 05:24:06 -0000 1.2.30.2 @@ -22,8 +22,11 @@ Instantiate with the buffer size as parameter. """ - def declareSize(self, name): - Output("int %s__len__ = %s;", name, self.size) + def getSizeDeclarations(self, name): + return [] + + def getAuxDeclarations(self, name): + return ["int %s__len__ = %s" % (name, self.size)] def passOutput(self, name): return "%s__out__, &%s__len__" % (name, name) Index: bgenStringBuffer.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Tools/bgen/bgen/bgenStringBuffer.py,v retrieving revision 1.1.30.2 retrieving revision 1.1.30.3 diff -u -d -r1.1.30.2 -r1.1.30.3 --- bgenStringBuffer.py 7 Jan 2005 07:05:04 -0000 1.1.30.2 +++ bgenStringBuffer.py 16 Oct 2005 05:24:06 -0000 1.1.30.3 @@ -23,8 +23,11 @@ less common. I'll write the classes when there is demand.) """ - def declareSize(self, name): - pass + def getSizeDeclarations(self, name): + return [] + + def getAuxDeclarations(self, name): + return [] def getargsFormat(self): return "s" Index: bgenType.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Tools/bgen/bgen/bgenType.py,v retrieving revision 1.9.2.2 retrieving revision 1.9.2.3 diff -u -d -r1.9.2.2 -r1.9.2.3 --- bgenType.py 7 Jan 2005 07:05:05 -0000 1.9.2.2 +++ bgenType.py 16 Oct 2005 05:24:06 -0000 1.9.2.3 @@ -19,12 +19,34 @@ self.typeName = typeName self.fmt = fmt - def declare(self, name): + def declare(self, name, reference=False): """Declare a variable of the type with a given name. Example: int.declare('spam') prints "int spam;" """ - Output("%s %s;", self.typeName, name) + for decl in self.getArgDeclarations(name, reference): + Output("%s;", decl) + for decl in self.getAuxDeclarations(name): + Output("%s;", decl) + + def getArgDeclarations(self, name, reference=False, constmode=False): + """Return the main part of the declarations for this type: the items + that will be passed as arguments in the C/C++ function call.""" + if reference: + ref = "&" + else: + ref = "" + if constmode: + const = "const " + else: + const = "" + return ["%s%s%s %s" % (const, self.typeName, ref, name)] + + def getAuxDeclarations(self, name): + """Return any auxiliary declarations needed for implementing this + type, such as helper variables used to hold sizes, etc. These declarations + are not part of the C/C++ function call interface.""" + return [] def getargs(self): return self.getargsFormat(), self.getargsArgs() @@ -44,11 +66,18 @@ """ return "&" + name + def getargsPreCheck(self, name): + """Perform any actions needed before calling getargs(). + + This could include declaring temporary variables and such. + """ + def getargsCheck(self, name): """Perform any needed post-[new]getargs() checks. This is type-dependent; the default does not check for errors. - An example would be a check for a maximum string length.""" + An example would be a check for a maximum string length, or it + could do post-getargs() copying or conversion.""" def passInput(self, name): """Return an argument for passing a variable into a call. @@ -64,6 +93,12 @@ """ return "&" + name + def passReference(self, name): + """Return an argument for C++ pass-by-reference. + Default is to call passInput(). + """ + return self.passInput(name) + def errorCheck(self, name): """Check for an error returned in the variable. @@ -96,6 +131,12 @@ """ return name + def mkvaluePreCheck(self, name): + """Perform any actions needed before calling mkvalue(). + + This could include declaring temporary variables and such. + """ + def cleanup(self, name): """Clean up if necessary. @@ -172,8 +213,11 @@ self.substitute = substitute self.typeName = None # Don't show this argument in __doc__ string - def declare(self, name): - pass + def getArgDeclarations(self, name, reference=False, constmode=False): + return [] + + def getAuxDeclarations(self, name, reference=False): + return [] def getargsFormat(self): return "" @@ -237,6 +281,25 @@ def mkvalueArgs(self, name): return "%s, %s" % (self.new, name) +class OpaqueByRefType(OpaqueType): + """An opaque object type, passed by reference. + + Instantiate with the type name, and optionally an object type name whose + New/Convert functions will be used. + """ + + def passInput(self, name): + return name + +# def passOutput(self, name): +# return name + + def mkvalueFormat(self): + return "O" + + def mkvalueArgs(self, name): + return "%s(%s)" % (self.new, name) + class OpaqueByValueStructType(OpaqueByValueType): """Similar to OpaqueByValueType, but we also pass this to mkvalue by address, in stead of by value. Index: bgenVariable.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Tools/bgen/bgen/bgenVariable.py,v retrieving revision 1.1.30.1 retrieving revision 1.1.30.2 diff -u -d -r1.1.30.1 -r1.1.30.2 --- bgenVariable.py 28 Apr 2003 17:16:37 -0000 1.1.30.1 +++ bgenVariable.py 16 Oct 2005 05:24:06 -0000 1.1.30.2 @@ -13,7 +13,8 @@ SelfMode = 4+InMode # this is 'self' -- don't declare it ReturnMode = 8+OutMode # this is the function return value ErrorMode = 16+OutMode # this is an error status -- turn it into an exception - +RefMode = 32 +ConstMode = 64 class Variable: @@ -39,9 +40,21 @@ If it is "self", it is not declared. """ - if self.flags != SelfMode: + if self.flags == ReturnMode+RefMode: + self.type.declare(self.name, reference=True) + elif self.flags != SelfMode: self.type.declare(self.name) + def getArgDeclarations(self, constmode=False): + refmode = (self.flags & RefMode) + if constmode: + constmode = (self.flags & ConstMode) + return self.type.getArgDeclarations(self.name, + reference=refmode, constmode=constmode) + + def getAuxDeclarations(self): + return self.type.getAuxDeclarations(self.name) + def getargsFormat(self): """Call the type's getargsFormatmethod.""" return self.type.getargsFormat() @@ -53,6 +66,9 @@ def getargsCheck(self): return self.type.getargsCheck(self.name) + def getargsPreCheck(self): + return self.type.getargsPreCheck(self.name) + def passArgument(self): """Return the string required to pass the variable as argument. @@ -62,6 +78,8 @@ """ if self.mode == InMode: return self.type.passInput(self.name) + if self.mode & RefMode: + return self.type.passReference(self.name) if self.mode in (OutMode, InOutMode): return self.type.passOutput(self.name) # XXX Shouldn't get here @@ -83,6 +101,9 @@ """Call the type's mkvalueArgs method.""" return self.type.mkvalueArgs(self.name) + def mkvaluePreCheck(self): + return self.type.mkvaluePreCheck(self.name) + def cleanup(self): """Call the type's cleanup method.""" return self.type.cleanup(self.name) Index: scantools.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Tools/bgen/bgen/scantools.py,v retrieving revision 1.28.2.2 retrieving revision 1.28.2.3 diff -u -d -r1.28.2.2 -r1.28.2.3 --- scantools.py 7 Jan 2005 07:05:05 -0000 1.28.2.2 +++ scantools.py 16 Oct 2005 05:24:06 -0000 1.28.2.3 @@ -32,6 +32,76 @@ Error = "scantools.Error" +BEGINHTMLREPORT=""" + + + +Bgen scan report + + +

Bgen scan report

+

Legend

+

This scan report is intended to help you debug the regular expressions +used by the bgen scanner. It consists of the original ".h" header file(s) +marked up to show you what the regular expressions in the bgen parser matched +for each line. NOTE: comments in the original source files may or may not be +shown.

+

The typographic conventions of this file are as follows:

+
+
comment stripping
+
comment stripping is /* marked up */ and the line is repeated if needed
+

If anything here does not appear to happen correctly look at +comment1_pat and comment2_pat.

+
+
constant definitions
+
#define name value
+

Highlights name and value of the constant. Governed by sym_pat.

+
+
function declaration
+
char *rindex(const char *s, int c);
+

Highlights type, name and argument list. type_pat, +name_pat and args_pat are combined into whole_pat, which +is what is used here.

+ +
incomplete match for function declaration
+
char *foo;
+

The beginning of this looked promising, but it did not match a function declaration. +In other words, it matched head_pat but not whole_pat. If the next +declaration has also been gobbled up you need to look at end_pat.

+
+
unrecognized input
+
#include "type.h"
+

If there are function declarations the scanner has missed (i.e. things +are in this class but you want them to be declarations) you need to adapt +head_pat. +

+
+

Output

+
+
+"""
+ENDHTMLREPORT="""
+
+ + +""" + class Scanner: # Set to 1 in subclass to debug your scanner patterns. @@ -232,9 +302,11 @@ self.specmine = 0 self.defsmine = 0 self.scanmine = 0 + self.htmlmine = 0 self.specfile = sys.stdout self.defsfile = None self.scanfile = sys.stdin + self.htmlfile = None self.lineno = 0 self.line = "" @@ -286,6 +358,7 @@ self.closespec() self.closedefs() self.closescan() + self.closehtml() def closespec(self): tmp = self.specmine and self.specfile @@ -302,6 +375,12 @@ self.scanfile = None if tmp: tmp.close() + def closehtml(self): + if self.htmlfile: self.htmlfile.write(ENDHTMLREPORT) + tmp = self.htmlmine and self.htmlfile + self.htmlfile = None + if tmp: tmp.close() + def setoutput(self, spec, defs = None): self.closespec() self.closedefs() @@ -324,6 +403,19 @@ self.defsfile = file self.defsmine = mine + def sethtmloutput(self, htmlfile): + self.closehtml() + if htmlfile: + if type(htmlfile) == StringType: + file = self.openoutput(htmlfile) + mine = 1 + else: + file = htmlfile + mine = 0 + self.htmlfile = file + self.htmlmine = mine + self.htmlfile.write(BEGINHTMLREPORT) + def openoutput(self, filename): try: file = open(filename, 'w') @@ -408,11 +500,17 @@ self.report("LINE: %r" % (line,)) match = self.comment1.match(line) if match: + self.htmlreport(line, klass='commentstripping', ranges=[( + match.start('rest'), match.end('rest'), 'notcomment')]) line = match.group('rest') if self.debug: self.report("\tafter comment1: %r" % (line,)) match = self.comment2.match(line) while match: + if match: + self.htmlreport(line, klass='commentstripping', ranges=[ + (match.start('rest1'), match.end('rest1'), 'notcomment'), + (match.start('rest2'), match.end('rest2'), 'notcomment')]) line = match.group('rest1')+match.group('rest2') if self.debug: self.report("\tafter comment2: %r" % (line,)) @@ -422,7 +520,7 @@ if match: if self.debug: self.report("\tmatches sym.") - self.dosymdef(match) + self.dosymdef(match, line) continue match = self.head.match(line) if match: @@ -430,19 +528,26 @@ self.report("\tmatches head.") self.dofuncspec() continue + self.htmlreport(line, klass='unmatched') except EOFError: self.error("Uncaught EOF error") self.reportusedtypes() - def dosymdef(self, match): + def dosymdef(self, match, line): name, defn = match.group('name', 'defn') + self.htmlreport(line, klass='constant', ranges=[ + (match.start('name'), match.end('name'), 'name'), + (match.start('defn'), match.end('defn'), 'value')]) defn = escape8bit(defn) if self.debug: self.report("\tsym: name=%r, defn=%r" % (name, defn)) if not name in self.blacklistnames: - self.defsfile.write("%s = %s\n" % (name, defn)) + oline = "%s = %s\n" % (name, defn) + self.defsfile.write(oline) + self.htmlreport(oline, klass="pyconstant") else: self.defsfile.write("# %s = %s\n" % (name, defn)) + self.htmlreport("** no output: name is blacklisted", klass="blconstant") # XXXX No way to handle greylisted names def dofuncspec(self): @@ -466,25 +571,37 @@ if self.debug: self.report("* WHOLE LINE: %r" % (raw,)) self.processrawspec(raw) + return raw def processrawspec(self, raw): match = self.whole.search(raw) if not match: self.report("Bad raw spec: %r", raw) if self.debug: - if not self.type.search(raw): + match = self.type.search(raw) + if not match: self.report("(Type already doesn't match)") + self.htmlreport(raw, klass='incomplete', ranges=[( + match.start('type'), match.end('type'), 'type')]) else: self.report("(but type matched)") + self.htmlreport(raw, klass='incomplete') return type, name, args = match.group('type', 'name', 'args') - type = re.sub("\*", " ptr", type) - type = re.sub("[ \t]+", "_", type) - if name in self.alreadydone: - self.report("Name has already been defined: %r", name) + ranges=[ + (match.start('type'), match.end('type'), 'type'), + (match.start('name'), match.end('name'), 'name'), + (match.start('args'), match.end('args'), 'arglist')] + self.htmlreport(raw, klass='declaration', ranges=ranges) + modifiers = self.getmodifiers(match) + type = self.pythonizename(type) + name = self.pythonizename(name) + if self.checkduplicate(name): + self.htmlreport("*** no output generated: duplicate name", klass="blacklisted") return self.report("==> %s %s <==", type, name) if self.blacklisted(type, name): + self.htmlreport("*** no output generated: function name or return type blacklisted", klass="blacklisted") self.report("*** %s %s blacklisted", type, name) return returnlist = [(type, name, 'ReturnMode')] @@ -493,12 +610,31 @@ arglist = self.extractarglist(args) arglist = self.repairarglist(name, arglist) if self.unmanageable(type, name, arglist): + self.htmlreport("*** no output generated: some argument blacklisted", klass="blacklisted") ##for arg in arglist: ## self.report(" %r", arg) self.report("*** %s %s unmanageable", type, name) return + if modifiers: + self.generate(type, name, arglist, modifiers) + else: + self.generate(type, name, arglist) + + def getmodifiers(self, match): + return [] + + def checkduplicate(self, name): + if name in self.alreadydone: + self.report("Name has already been defined: %r", name) + return True self.alreadydone.append(name) - self.generate(type, name, arglist) + return False + + def pythonizename(self, name): + name = re.sub("\*", " ptr", name) + name = name.strip() + name = re.sub("[ \t]+", "_", name) + return name def extractarglist(self, args): args = args.strip() @@ -522,9 +658,7 @@ if array: # array matches an optional [] after the argument name type = type + " ptr " - type = re.sub("\*", " ptr ", type) - type = type.strip() - type = re.sub("[ \t]+", "_", type) + type = self.pythonizename(type) return self.modifyarg(type, name, mode) def modifyarg(self, type, name, mode): @@ -587,23 +721,42 @@ ##self.report("new: %r", new) return new - def generate(self, type, name, arglist): - self.typeused(type, 'return') - classname, listname = self.destination(type, name, arglist) - if not self.specfile: return - self.specfile.write("f = %s(%s, %r,\n" % (classname, type, name)) + def generate(self, tp, name, arglist, modifiers=[]): + + self.typeused(tp, 'return') + if modifiers: + classname, listname = self.destination(tp, name, arglist, modifiers) + else: + classname, listname = self.destination(tp, name, arglist) + if not classname or not listname: + self.htmlreport("*** no output generated: self.destination() returned None", klass="blacklisted") + return + if not self.specfile: + self.htmlreport("*** no output generated: no output file specified", klass="blacklisted") + return + self.specfile.write("f = %s(%s, %r,\n" % (classname, tp, name)) for atype, aname, amode in arglist: self.typeused(atype, amode) self.specfile.write(" (%s, %r, %s),\n" % (atype, aname, amode)) if self.greydictnames.has_key(name): self.specfile.write(" condition=%r,\n"%(self.greydictnames[name],)) + self.generatemodifiers(classname, name, modifiers) self.specfile.write(")\n") self.specfile.write("%s.append(f)\n\n" % listname) + if self.htmlfile: + oline = "Adding to %s:\n%s(returntype=%s, name=%r" % (listname, classname, tp, name) + for atype, aname, amode in arglist: + oline += ",\n (%s, %r, %s)" % (atype, aname, amode) + oline += ")\n" + self.htmlreport(oline, klass="pydeclaration") def destination(self, type, name, arglist): return "FunctionGenerator", "functions" + def generatemodifiers(self, classname, name, modifiers): + pass + def blacklisted(self, type, name): if type in self.blacklisttypes: ##self.report("return type %s is blacklisted", type) @@ -620,6 +773,34 @@ return 1 return 0 + def htmlreport(self, line, klass=None, ranges=None): + if not self.htmlfile: return + if ranges is None: + ranges = [] + if klass: + ranges.insert(0, (0, len(line), klass)) + oline = '' + i = 0 + for c in line: + for b, e, name in ranges: + if b == i: + oline += '' % name + if e == i: + oline += '' + i += 1 + + if c == '<': oline += '<' + elif c == '>': oline += '>' + else: oline += c + for b, e, name in ranges: + if b >= i: + oline += '' % name + if e >= i: + oline += '' + if not line or line[-1] != '\n': + oline += '\n' + self.htmlfile.write(oline) + class Scanner_PreUH3(Scanner): """Scanner for Universal Headers before release 3""" def initpatterns(self): From jhylton at users.sourceforge.net Sun Oct 16 07:24:11 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:11 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Objects abstract.c, 2.103.2.2, 2.103.2.3 bufferobject.c, 2.19.2.2, 2.19.2.3 classobject.c, 2.158.2.2, 2.158.2.3 complexobject.c, 2.62.2.2, 2.62.2.3 descrobject.c, 2.27.2.2, 2.27.2.3 dictobject.c, 2.126.2.2, 2.126.2.3 enumobject.c, 1.2.2.2, 1.2.2.3 fileobject.c, 2.164.2.2, 2.164.2.3 floatobject.c, 2.113.2.2, 2.113.2.3 funcobject.c, 2.55.2.4, 2.55.2.5 genobject.c, 1.4.4.1, 1.4.4.2 intobject.c, 2.84.2.2, 2.84.2.3 iterobject.c, 1.10.2.2, 1.10.2.3 listobject.c, 2.114.2.2, 2.114.2.3 listsort.txt, 1.6.10.1, 1.6.10.2 longobject.c, 1.118.2.2, 1.118.2.3 object.c, 2.179.2.3, 2.179.2.4 obmalloc.c, 2.45.2.2, 2.45.2.3 rangeobject.c, 2.41.2.2, 2.41.2.3 setobject.c, 1.31.4.1, 1.31.4.2 sliceobject.c, 2.15.2.2, 2.15.2.3 stringobject.c, 2.168.2.2, 2.168.2.3 tupleobject.c, 2.68.2.2, 2.68.2.3 typeobject.c, 2.157.2.3, 2.157.2.4 unicodeobject.c, 2.155.2.2, 2.155.2.3 weakrefobject.c, 1.9.2.2, 1.9.2.3 Message-ID: <20051016052411.9EA361E4011@bag.python.org> Update of /cvsroot/python/python/dist/src/Objects In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Objects Modified Files: Tag: ast-branch abstract.c bufferobject.c classobject.c complexobject.c descrobject.c dictobject.c enumobject.c fileobject.c floatobject.c funcobject.c genobject.c intobject.c iterobject.c listobject.c listsort.txt longobject.c object.c obmalloc.c rangeobject.c setobject.c sliceobject.c stringobject.c tupleobject.c typeobject.c unicodeobject.c weakrefobject.c Log Message: Merge head to branch (for the last time) Index: abstract.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/abstract.c,v retrieving revision 2.103.2.2 retrieving revision 2.103.2.3 diff -u -d -r2.103.2.2 -r2.103.2.3 --- abstract.c 7 Jan 2005 07:03:43 -0000 2.103.2.2 +++ abstract.c 16 Oct 2005 05:24:04 -0000 2.103.2.3 @@ -81,6 +81,31 @@ } #define PyObject_Length PyObject_Size +int +_PyObject_LengthCue(PyObject *o) +{ + int rv = PyObject_Size(o); + if (rv != -1) + return rv; + if (PyErr_ExceptionMatches(PyExc_TypeError) || + PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyObject *err_type, *err_value, *err_tb, *ro; + + PyErr_Fetch(&err_type, &err_value, &err_tb); + ro = PyObject_CallMethod(o, "_length_cue", NULL); + if (ro != NULL) { + rv = (int)PyInt_AsLong(ro); + Py_DECREF(ro); + Py_XDECREF(err_type); + Py_XDECREF(err_value); + Py_XDECREF(err_tb); + return rv; + } + PyErr_Restore(err_type, err_value, err_tb); + } + return -1; +} + PyObject * PyObject_GetItem(PyObject *o, PyObject *key) { @@ -951,7 +976,19 @@ Py_INCREF(o); return o; } - if (PyInt_Check(o)) { + m = o->ob_type->tp_as_number; + if (m && m->nb_int) { /* This should include subclasses of int */ + PyObject *res = m->nb_int(o); + if (res && (!PyInt_Check(res) && !PyLong_Check(res))) { + PyErr_Format(PyExc_TypeError, + "__int__ returned non-int (type %.200s)", + res->ob_type->tp_name); + Py_DECREF(res); + return NULL; + } + return res; + } + if (PyInt_Check(o)) { /* A int subclass without nb_int */ PyIntObject *io = (PyIntObject*)o; return PyInt_FromLong(io->ob_ival); } @@ -964,18 +1001,6 @@ PyUnicode_GET_SIZE(o), 10); #endif - m = o->ob_type->tp_as_number; - if (m && m->nb_int) { - PyObject *res = m->nb_int(o); - if (res && (!PyInt_Check(res) && !PyLong_Check(res))) { - PyErr_Format(PyExc_TypeError, - "__int__ returned non-int (type %.200s)", - res->ob_type->tp_name); - Py_DECREF(res); - return NULL; - } - return res; - } if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len)) return int_from_string((char*)buffer, buffer_len); @@ -1010,11 +1035,19 @@ if (o == NULL) return null_error(); - if (PyLong_CheckExact(o)) { - Py_INCREF(o); - return o; + m = o->ob_type->tp_as_number; + if (m && m->nb_long) { /* This should include subclasses of long */ + PyObject *res = m->nb_long(o); + if (res && (!PyInt_Check(res) && !PyLong_Check(res))) { + PyErr_Format(PyExc_TypeError, + "__long__ returned non-long (type %.200s)", + res->ob_type->tp_name); + Py_DECREF(res); + return NULL; + } + return res; } - if (PyLong_Check(o)) + if (PyLong_Check(o)) /* A long subclass without nb_long */ return _PyLong_Copy((PyLongObject *)o); if (PyString_Check(o)) /* need to do extra error checking that PyLong_FromString() @@ -1030,18 +1063,6 @@ PyUnicode_GET_SIZE(o), 10); #endif - m = o->ob_type->tp_as_number; - if (m && m->nb_long) { - PyObject *res = m->nb_long(o); - if (res && (!PyInt_Check(res) && !PyLong_Check(res))) { - PyErr_Format(PyExc_TypeError, - "__long__ returned non-long (type %.200s)", - res->ob_type->tp_name); - Py_DECREF(res); - return NULL; - } - return res; - } if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len)) return long_from_string(buffer, buffer_len); @@ -1055,28 +1076,22 @@ if (o == NULL) return null_error(); - if (PyFloat_CheckExact(o)) { - Py_INCREF(o); - return o; + m = o->ob_type->tp_as_number; + if (m && m->nb_float) { /* This should include subclasses of float */ + PyObject *res = m->nb_float(o); + if (res && !PyFloat_Check(res)) { + PyErr_Format(PyExc_TypeError, + "__float__ returned non-float (type %.200s)", + res->ob_type->tp_name); + Py_DECREF(res); + return NULL; + } + return res; } - if (PyFloat_Check(o)) { + if (PyFloat_Check(o)) { /* A float subclass with nb_float == NULL */ PyFloatObject *po = (PyFloatObject *)o; return PyFloat_FromDouble(po->ob_fval); } - if (!PyString_Check(o)) { - m = o->ob_type->tp_as_number; - if (m && m->nb_float) { - PyObject *res = m->nb_float(o); - if (res && !PyFloat_Check(res)) { - PyErr_Format(PyExc_TypeError, - "__float__ returned non-float (type %.200s)", - res->ob_type->tp_name); - Py_DECREF(res); - return NULL; - } - return res; - } - } return PyFloat_FromString(o, NULL); } @@ -1409,8 +1424,13 @@ return NULL; /* Guess result size and allocate space. */ - n = PyObject_Size(v); + n = _PyObject_LengthCue(v); if (n < 0) { + if (!PyErr_ExceptionMatches(PyExc_TypeError) && + !PyErr_ExceptionMatches(PyExc_AttributeError)) { + Py_DECREF(it); + return NULL; + } PyErr_Clear(); n = 10; /* arbitrary */ } @@ -1807,7 +1827,9 @@ PyObject_CallMethod(PyObject *o, char *name, char *format, ...) { va_list va; - PyObject *args, *func = 0, *retval; + PyObject *args = NULL; + PyObject *func = NULL; + PyObject *retval = NULL; if (o == NULL || name == NULL) return null_error(); @@ -1818,8 +1840,10 @@ return 0; } - if (!PyCallable_Check(func)) - return type_error("call of non-callable attribute"); + if (!PyCallable_Check(func)) { + type_error("call of non-callable attribute"); + goto exit; + } if (format && *format) { va_start(va, format); @@ -1830,23 +1854,24 @@ args = PyTuple_New(0); if (!args) - return NULL; + goto exit; if (!PyTuple_Check(args)) { PyObject *a; a = PyTuple_New(1); if (a == NULL) - return NULL; + goto exit; if (PyTuple_SetItem(a, 0, args) < 0) - return NULL; + goto exit; args = a; } retval = PyObject_Call(func, args, NULL); - Py_DECREF(args); - Py_DECREF(func); + exit: + Py_XDECREF(args); + Py_XDECREF(func); return retval; } Index: bufferobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/bufferobject.c,v retrieving revision 2.19.2.2 retrieving revision 2.19.2.3 diff -u -d -r2.19.2.2 -r2.19.2.3 --- bufferobject.c 7 Jan 2005 07:03:44 -0000 2.19.2.2 +++ bufferobject.c 16 Oct 2005 05:24:04 -0000 2.19.2.3 @@ -192,7 +192,10 @@ int offset = 0; int size = Py_END_OF_BUFFER; - if ( !PyArg_ParseTuple(args, "O|ii:buffer", &ob, &offset, &size) ) + if (!_PyArg_NoKeywords("buffer()", kw)) + return NULL; + + if (!PyArg_ParseTuple(args, "O|ii:buffer", &ob, &offset, &size)) return NULL; return PyBuffer_FromObject(ob, offset, size); } Index: classobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/classobject.c,v retrieving revision 2.158.2.2 retrieving revision 2.158.2.3 diff -u -d -r2.158.2.2 -r2.158.2.3 --- classobject.c 7 Jan 2005 07:03:44 -0000 2.158.2.2 +++ classobject.c 16 Oct 2005 05:24:04 -0000 2.158.2.3 @@ -1013,7 +1013,17 @@ if (res == NULL) return -1; if (PyInt_Check(res)) { - outcome = PyInt_AsLong(res); + long temp = PyInt_AsLong(res); + outcome = (int)temp; +#if SIZEOF_INT < SIZEOF_LONG + /* Overflow check -- range of PyInt is more than C int */ + if (outcome != temp) { + PyErr_SetString(PyExc_OverflowError, + "__len__() should return 0 <= outcome < 2**31"); + outcome = -1; + } + else +#endif if (outcome < 0) PyErr_SetString(PyExc_ValueError, "__len__() should return >= 0"); @@ -2208,6 +2218,12 @@ } if (self == Py_None) self = NULL; + if (self == NULL && classObj == NULL) { + PyErr_SetString(PyExc_TypeError, + "unbound methods must have non-NULL im_class"); + return NULL; + } + return PyMethod_New(func, self, classObj); } @@ -2480,7 +2496,7 @@ (getattrofunc)instancemethod_getattro, /* tp_getattro */ PyObject_GenericSetAttr, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */ instancemethod_doc, /* tp_doc */ (traverseproc)instancemethod_traverse, /* tp_traverse */ 0, /* tp_clear */ Index: complexobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/complexobject.c,v retrieving revision 2.62.2.2 retrieving revision 2.62.2.3 diff -u -d -r2.62.2.2 -r2.62.2.3 --- complexobject.c 7 Jan 2005 07:03:44 -0000 2.62.2.2 +++ complexobject.c 16 Oct 2005 05:24:04 -0000 2.62.2.3 @@ -279,15 +279,12 @@ strncat(buf, "j", bufsz); } else { char re[64], im[64]; - char *fmt; + /* Format imaginary part with sign, real part without */ PyOS_snprintf(format, 32, "%%.%ig", precision); PyOS_ascii_formatd(re, 64, format, v->cval.real); + PyOS_snprintf(format, 32, "%%+.%ig", precision); PyOS_ascii_formatd(im, 64, format, v->cval.imag); - if (v->cval.imag < 0.) - fmt = "(%s%sj)"; - else - fmt = "(%s+%sj)"; - PyOS_snprintf(buf, bufsz, fmt, re, im); + PyOS_snprintf(buf, bufsz, "(%s%sj)", re, im); } } Index: descrobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/descrobject.c,v retrieving revision 2.27.2.2 retrieving revision 2.27.2.3 diff -u -d -r2.27.2.2 -r2.27.2.3 --- descrobject.c 7 Jan 2005 07:03:45 -0000 2.27.2.2 +++ descrobject.c 16 Oct 2005 05:24:04 -0000 2.27.2.3 @@ -144,7 +144,7 @@ return res; if (descr->d_getset->get != NULL) return descr->d_getset->get(obj, descr->d_getset->closure); - PyErr_Format(PyExc_TypeError, + PyErr_Format(PyExc_AttributeError, "attribute '%.300s' of '%.100s' objects is not readable", descr_name((PyDescrObject *)descr), descr->d_type->tp_name); @@ -199,7 +199,7 @@ if (descr->d_getset->set != NULL) return descr->d_getset->set(obj, value, descr->d_getset->closure); - PyErr_Format(PyExc_TypeError, + PyErr_Format(PyExc_AttributeError, "attribute '%.300s' of '%.100s' objects is not writable", descr_name((PyDescrObject *)descr), descr->d_type->tp_name); Index: dictobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/dictobject.c,v retrieving revision 2.126.2.2 retrieving revision 2.126.2.3 diff -u -d -r2.126.2.2 -r2.126.2.3 --- dictobject.c 7 Jan 2005 07:03:45 -0000 2.126.2.2 +++ dictobject.c 16 Oct 2005 05:24:04 -0000 2.126.2.3 @@ -113,7 +113,7 @@ */ /* Object used as dummy key to fill deleted entries */ -static PyObject *dummy; /* Initialized by first call to newdictobject() */ +static PyObject *dummy = NULL; /* Initialized by first call to newdictobject() */ /* forward declarations */ static dictentry * @@ -400,8 +400,10 @@ else { if (ep->me_key == NULL) mp->ma_fill++; - else - Py_DECREF(ep->me_key); + else { + assert(ep->me_key == dummy); + Py_DECREF(dummy); + } ep->me_key = key; ep->me_hash = hash; ep->me_value = value; @@ -565,7 +567,7 @@ */ if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2)) return 0; - return dictresize(mp, mp->ma_used*(mp->ma_used>50000 ? 2 : 4)); + return dictresize(mp, (mp->ma_used>50000 ? mp->ma_used*2 : mp->ma_used*4)); } int @@ -1201,6 +1203,12 @@ if (other == mp || other->ma_used == 0) /* a.update(a) or a.update({}); nothing to do */ return 0; + if (mp->ma_used == 0) + /* Since the target dict is empty, PyDict_GetItem() + * always returns NULL. Setting override to 1 + * skips the unnecessary test. + */ + override = 1; /* Do one big resize at the start, rather than * incrementally resizing as we insert new items. Expect * that there will be no (or few) overlapping keys. @@ -1289,7 +1297,7 @@ if (PyDict_Merge(copy, o, 1) == 0) return copy; Py_DECREF(copy); - return copy; + return NULL; } int @@ -2046,17 +2054,20 @@ PyObject_Del(di); } -static int +static PyObject * dictiter_len(dictiterobject *di) { + int len = 0; if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used) - return di->len; - return 0; + len = di->len; + return PyInt_FromLong(len); } -static PySequenceMethods dictiter_as_sequence = { - (inquiry)dictiter_len, /* sq_length */ - 0, /* sq_concat */ +PyDoc_STRVAR(length_cue_doc, "Private method returning an estimate of len(list(it))."); + +static PyMethodDef dictiter_methods[] = { + {"_length_cue", (PyCFunction)dictiter_len, METH_NOARGS, length_cue_doc}, + {NULL, NULL} /* sentinel */ }; static PyObject *dictiter_iternextkey(dictiterobject *di) @@ -2112,7 +2123,7 @@ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ - &dictiter_as_sequence, /* tp_as_sequence */ + 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ @@ -2128,6 +2139,8 @@ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc)dictiter_iternextkey, /* tp_iternext */ + dictiter_methods, /* tp_methods */ + 0, }; static PyObject *dictiter_iternextvalue(dictiterobject *di) @@ -2183,7 +2196,7 @@ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ - &dictiter_as_sequence, /* tp_as_sequence */ + 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ @@ -2199,6 +2212,8 @@ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc)dictiter_iternextvalue, /* tp_iternext */ + dictiter_methods, /* tp_methods */ + 0, }; static PyObject *dictiter_iternextitem(dictiterobject *di) @@ -2268,7 +2283,7 @@ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ - &dictiter_as_sequence, /* tp_as_sequence */ + 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ @@ -2284,4 +2299,6 @@ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc)dictiter_iternextitem, /* tp_iternext */ + dictiter_methods, /* tp_methods */ + 0, }; Index: enumobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/enumobject.c,v retrieving revision 1.2.2.2 retrieving revision 1.2.2.3 diff -u -d -r1.2.2.2 -r1.2.2.3 --- enumobject.c 7 Jan 2005 07:04:00 -0000 1.2.2.2 +++ enumobject.c 16 Oct 2005 05:24:04 -0000 1.2.2.3 @@ -239,23 +239,25 @@ "\n" "Return a reverse iterator"); -static int +static PyObject * reversed_len(reversedobject *ro) { int position, seqsize; if (ro->seq == NULL) - return 0; + return PyInt_FromLong(0); seqsize = PySequence_Size(ro->seq); if (seqsize == -1) - return -1; + return NULL; position = ro->index + 1; - return (seqsize < position) ? 0 : position; + return PyInt_FromLong((seqsize < position) ? 0 : position); } -static PySequenceMethods reversed_as_sequence = { - (inquiry)reversed_len, /* sq_length */ - 0, /* sq_concat */ +PyDoc_STRVAR(length_cue_doc, "Private method returning an estimate of len(list(it))."); + +static PyMethodDef reversediter_methods[] = { + {"_length_cue", (PyCFunction)reversed_len, METH_NOARGS, length_cue_doc}, + {NULL, NULL} /* sentinel */ }; PyTypeObject PyReversed_Type = { @@ -272,7 +274,7 @@ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ - &reversed_as_sequence, /* tp_as_sequence */ + 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ @@ -289,7 +291,7 @@ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc)reversed_next, /* tp_iternext */ - 0, /* tp_methods */ + reversediter_methods, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ Index: fileobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/fileobject.c,v retrieving revision 2.164.2.2 retrieving revision 2.164.2.3 diff -u -d -r2.164.2.2 -r2.164.2.3 --- fileobject.c 7 Jan 2005 07:04:01 -0000 2.164.2.2 +++ fileobject.c 16 Oct 2005 05:24:04 -0000 2.164.2.3 @@ -128,6 +128,54 @@ return (PyObject *) f; } +/* check for known incorrect mode strings - problem is, platforms are + free to accept any mode characters they like and are supposed to + ignore stuff they don't understand... write or append mode with + universal newline support is expressly forbidden by PEP 278. */ +/* zero return is kewl - one is un-kewl */ +static int +check_the_mode(char *mode) +{ + unsigned int len = strlen(mode); + + switch (len) { + case 0: + PyErr_SetString(PyExc_ValueError, "empty mode string"); + return 1; + + /* reject wU, aU */ + case 2: + switch (mode[0]) { + case 'w': + case 'a': + if (mode[1] == 'U') { + PyErr_SetString(PyExc_ValueError, + "invalid mode string"); + return 1; + } + break; + } + break; + + /* reject w+U, a+U, wU+, aU+ */ + case 3: + switch (mode[0]) { + case 'w': + case 'a': + if ((mode[1] == '+' && mode[2] == 'U') || + (mode[1] == 'U' && mode[2] == '+')) { + PyErr_SetString(PyExc_ValueError, + "invalid mode string"); + return 1; + } + break; + } + break; + } + + return 0; +} + static PyObject * open_the_file(PyFileObject *f, char *name, char *mode) { @@ -142,6 +190,9 @@ assert(mode != NULL); assert(f->f_fp == NULL); + if (check_the_mode(mode)) + return NULL; + /* rexec.py can't stop a user from getting the file() constructor -- all they have to do is get *any* file object f, and then do type(f). Here we prevent them from doing damage with it. */ Index: floatobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/floatobject.c,v retrieving revision 2.113.2.2 retrieving revision 2.113.2.3 diff -u -d -r2.113.2.2 -r2.113.2.3 --- floatobject.c 7 Jan 2005 07:04:01 -0000 2.113.2.2 +++ floatobject.c 16 Oct 2005 05:24:04 -0000 2.113.2.3 @@ -926,7 +926,10 @@ static PyObject * float_float(PyObject *v) { - Py_INCREF(v); + if (PyFloat_CheckExact(v)) + Py_INCREF(v); + else + v = PyFloat_FromDouble(((PyFloatObject *)v)->ob_fval); return v; } @@ -980,8 +983,139 @@ return Py_BuildValue("(d)", v->ob_fval); } +/* this is for the benefit of the pack/unpack routines below */ + +typedef enum { + unknown_format, ieee_big_endian_format, ieee_little_endian_format +} float_format_type; + +static float_format_type double_format, float_format; +static float_format_type detected_double_format, detected_float_format; + +static PyObject * +float_getformat(PyTypeObject *v, PyObject* arg) +{ + char* s; + float_format_type r; + + if (!PyString_Check(arg)) { + PyErr_Format(PyExc_TypeError, + "__getformat__() argument must be string, not %.500s", + arg->ob_type->tp_name); + return NULL; + } + s = PyString_AS_STRING(arg); + if (strcmp(s, "double") == 0) { + r = double_format; + } + else if (strcmp(s, "float") == 0) { + r = float_format; + } + else { + PyErr_SetString(PyExc_ValueError, + "__getformat__() argument 1 must be " + "'double' or 'float'"); + return NULL; + } + + switch (r) { + case unknown_format: + return PyString_FromString("unknown"); + case ieee_little_endian_format: + return PyString_FromString("IEEE, little-endian"); + case ieee_big_endian_format: + return PyString_FromString("IEEE, big-endian"); + default: + Py_FatalError("insane float_format or double_format"); + return NULL; + } +} + +PyDoc_STRVAR(float_getformat_doc, +"float.__getformat__(typestr) -> string\n" +"\n" +"You probably don't want to use this function. It exists mainly to be\n" +"used in Python's test suite.\n" +"\n" +"typestr must be 'double' or 'float'. This function returns whichever of\n" +"'unknown', 'IEEE, big-endian' or 'IEEE, little-endian' best describes the\n" +"format of floating point numbers used by the C type named by typestr."); + +static PyObject * +float_setformat(PyTypeObject *v, PyObject* args) +{ + char* typestr; + char* format; + float_format_type f; + float_format_type detected; + float_format_type *p; + + if (!PyArg_ParseTuple(args, "ss:__setformat__", &typestr, &format)) + return NULL; + + if (strcmp(typestr, "double") == 0) { + p = &double_format; + detected = detected_double_format; + } + else if (strcmp(typestr, "float") == 0) { + p = &float_format; + detected = detected_float_format; + } + else { + PyErr_SetString(PyExc_ValueError, + "__setformat__() argument 1 must " + "be 'double' or 'float'"); + return NULL; + } + + if (strcmp(format, "unknown") == 0) { + f = unknown_format; + } + else if (strcmp(format, "IEEE, little-endian") == 0) { + f = ieee_little_endian_format; + } + else if (strcmp(format, "IEEE, big-endian") == 0) { + f = ieee_big_endian_format; + } + else { + PyErr_SetString(PyExc_ValueError, + "__setformat__() argument 2 must be " + "'unknown', 'IEEE, little-endian' or " + "'IEEE, big-endian'"); + return NULL; + + } + + if (f != unknown_format && f != detected) { + PyErr_Format(PyExc_ValueError, + "can only set %s format to 'unknown' or the " + "detected platform value", typestr); + return NULL; + } + + *p = f; + Py_RETURN_NONE; +} + +PyDoc_STRVAR(float_setformat_doc, +"float.__setformat__(typestr, fmt) -> None\n" +"\n" +"You probably don't want to use this function. It exists mainly to be\n" +"used in Python's test suite.\n" +"\n" +"typestr must be 'double' or 'float'. fmt must be one of 'unknown',\n" +"'IEEE, big-endian' or 'IEEE, little-endian', and in addition can only be\n" +"one of the latter two if it appears to match the underlying C reality.\n" +"\n" +"Overrides the automatic determination of C-level floating point type.\n" +"This affects how floats are converted to and from binary strings."); + static PyMethodDef float_methods[] = { {"__getnewargs__", (PyCFunction)float_getnewargs, METH_NOARGS}, + {"__getformat__", (PyCFunction)float_getformat, + METH_O|METH_CLASS, float_getformat_doc}, + {"__setformat__", (PyCFunction)float_setformat, + METH_VARARGS|METH_CLASS, float_setformat_doc}, {NULL, NULL} /* sentinel */ }; @@ -1076,6 +1210,56 @@ }; void +_PyFloat_Init(void) +{ + /* We attempt to determine if this machine is using IEEE + floating point formats by peering at the bits of some + carefully chosen values. If it looks like we are on an + IEEE platform, the float packing/unpacking routines can + just copy bits, if not they resort to arithmetic & shifts + and masks. The shifts & masks approach works on all finite + values, but what happens to infinities, NaNs and signed + zeroes on packing is an accident, and attempting to unpack + a NaN or an infinity will raise an exception. + + Note that if we're on some whacked-out platform which uses + IEEE formats but isn't strictly little-endian or big- + endian, we will fall back to the portable shifts & masks + method. */ + +#if SIZEOF_DOUBLE == 8 + { + double x = 9006104071832581.0; + if (memcmp(&x, "\x43\x3f\xff\x01\x02\x03\x04\x05", 8) == 0) + detected_double_format = ieee_big_endian_format; + else if (memcmp(&x, "\x05\x04\x03\x02\x01\xff\x3f\x43", 8) == 0) + detected_double_format = ieee_little_endian_format; + else + detected_double_format = unknown_format; + } +#else + detected_double_format = unknown_format; +#endif + +#if SIZEOF_FLOAT == 4 + { + float y = 16711938.0; + if (memcmp(&y, "\x4b\x7f\x01\x02", 4) == 0) + detected_float_format = ieee_big_endian_format; + else if (memcmp(&y, "\x02\x01\x7f\x4b", 4) == 0) + detected_float_format = ieee_little_endian_format; + else + detected_float_format = unknown_format; + } +#else + detected_float_format = unknown_format; +#endif + + double_format = detected_double_format; + float_format = detected_float_format; +} + +void PyFloat_Fini(void) { PyFloatObject *p; @@ -1162,306 +1346,395 @@ int _PyFloat_Pack4(double x, unsigned char *p, int le) { - unsigned char sign; - int e; - double f; - unsigned int fbits; - int incr = 1; - - if (le) { - p += 3; - incr = -1; - } + if (float_format == unknown_format) { + unsigned char sign; + int e; + double f; + unsigned int fbits; + int incr = 1; - if (x < 0) { - sign = 1; - x = -x; - } - else - sign = 0; + if (le) { + p += 3; + incr = -1; + } - f = frexp(x, &e); + if (x < 0) { + sign = 1; + x = -x; + } + else + sign = 0; - /* Normalize f to be in the range [1.0, 2.0) */ - if (0.5 <= f && f < 1.0) { - f *= 2.0; - e--; - } - else if (f == 0.0) - e = 0; - else { - PyErr_SetString(PyExc_SystemError, - "frexp() result out of range"); - return -1; - } + f = frexp(x, &e); - if (e >= 128) - goto Overflow; - else if (e < -126) { - /* Gradual underflow */ - f = ldexp(f, 126 + e); - e = 0; - } - else if (!(e == 0 && f == 0.0)) { - e += 127; - f -= 1.0; /* Get rid of leading 1 */ - } + /* Normalize f to be in the range [1.0, 2.0) */ + if (0.5 <= f && f < 1.0) { + f *= 2.0; + e--; + } + else if (f == 0.0) + e = 0; + else { + PyErr_SetString(PyExc_SystemError, + "frexp() result out of range"); + return -1; + } - f *= 8388608.0; /* 2**23 */ - fbits = (unsigned int)(f + 0.5); /* Round */ - assert(fbits <= 8388608); - if (fbits >> 23) { - /* The carry propagated out of a string of 23 1 bits. */ - fbits = 0; - ++e; - if (e >= 255) + if (e >= 128) goto Overflow; - } + else if (e < -126) { + /* Gradual underflow */ + f = ldexp(f, 126 + e); + e = 0; + } + else if (!(e == 0 && f == 0.0)) { + e += 127; + f -= 1.0; /* Get rid of leading 1 */ + } - /* First byte */ - *p = (sign << 7) | (e >> 1); - p += incr; + f *= 8388608.0; /* 2**23 */ + fbits = (unsigned int)(f + 0.5); /* Round */ + assert(fbits <= 8388608); + if (fbits >> 23) { + /* The carry propagated out of a string of 23 1 bits. */ + fbits = 0; + ++e; + if (e >= 255) + goto Overflow; + } - /* Second byte */ - *p = (char) (((e & 1) << 7) | (fbits >> 16)); - p += incr; + /* First byte */ + *p = (sign << 7) | (e >> 1); + p += incr; - /* Third byte */ - *p = (fbits >> 8) & 0xFF; - p += incr; + /* Second byte */ + *p = (char) (((e & 1) << 7) | (fbits >> 16)); + p += incr; - /* Fourth byte */ - *p = fbits & 0xFF; + /* Third byte */ + *p = (fbits >> 8) & 0xFF; + p += incr; - /* Done */ - return 0; + /* Fourth byte */ + *p = fbits & 0xFF; - Overflow: - PyErr_SetString(PyExc_OverflowError, - "float too large to pack with f format"); - return -1; + /* Done */ + return 0; + + Overflow: + PyErr_SetString(PyExc_OverflowError, + "float too large to pack with f format"); + return -1; + } + else { + float y = (float)x; + const char *s = (char*)&y; + int i, incr = 1; + + if ((float_format == ieee_little_endian_format && !le) + || (float_format == ieee_big_endian_format && le)) { + p += 3; + incr = -1; + } + + for (i = 0; i < 4; i++) { + *p = *s++; + p += incr; + } + return 0; + } } int _PyFloat_Pack8(double x, unsigned char *p, int le) { - unsigned char sign; - int e; - double f; - unsigned int fhi, flo; - int incr = 1; + if (double_format == unknown_format) { + unsigned char sign; + int e; + double f; + unsigned int fhi, flo; + int incr = 1; - if (le) { - p += 7; - incr = -1; - } + if (le) { + p += 7; + incr = -1; + } - if (x < 0) { - sign = 1; - x = -x; - } - else - sign = 0; + if (x < 0) { + sign = 1; + x = -x; + } + else + sign = 0; - f = frexp(x, &e); + f = frexp(x, &e); - /* Normalize f to be in the range [1.0, 2.0) */ - if (0.5 <= f && f < 1.0) { - f *= 2.0; - e--; - } - else if (f == 0.0) - e = 0; - else { - PyErr_SetString(PyExc_SystemError, - "frexp() result out of range"); - return -1; - } + /* Normalize f to be in the range [1.0, 2.0) */ + if (0.5 <= f && f < 1.0) { + f *= 2.0; + e--; + } + else if (f == 0.0) + e = 0; + else { + PyErr_SetString(PyExc_SystemError, + "frexp() result out of range"); + return -1; + } - if (e >= 1024) - goto Overflow; - else if (e < -1022) { - /* Gradual underflow */ - f = ldexp(f, 1022 + e); - e = 0; - } - else if (!(e == 0 && f == 0.0)) { - e += 1023; - f -= 1.0; /* Get rid of leading 1 */ - } + if (e >= 1024) + goto Overflow; + else if (e < -1022) { + /* Gradual underflow */ + f = ldexp(f, 1022 + e); + e = 0; + } + else if (!(e == 0 && f == 0.0)) { + e += 1023; + f -= 1.0; /* Get rid of leading 1 */ + } - /* fhi receives the high 28 bits; flo the low 24 bits (== 52 bits) */ - f *= 268435456.0; /* 2**28 */ - fhi = (unsigned int)f; /* Truncate */ - assert(fhi < 268435456); + /* fhi receives the high 28 bits; flo the low 24 bits (== 52 bits) */ + f *= 268435456.0; /* 2**28 */ + fhi = (unsigned int)f; /* Truncate */ + assert(fhi < 268435456); - f -= (double)fhi; - f *= 16777216.0; /* 2**24 */ - flo = (unsigned int)(f + 0.5); /* Round */ - assert(flo <= 16777216); - if (flo >> 24) { - /* The carry propagated out of a string of 24 1 bits. */ - flo = 0; - ++fhi; - if (fhi >> 28) { - /* And it also progagated out of the next 28 bits. */ - fhi = 0; - ++e; - if (e >= 2047) - goto Overflow; + f -= (double)fhi; + f *= 16777216.0; /* 2**24 */ + flo = (unsigned int)(f + 0.5); /* Round */ + assert(flo <= 16777216); + if (flo >> 24) { + /* The carry propagated out of a string of 24 1 bits. */ + flo = 0; + ++fhi; + if (fhi >> 28) { + /* And it also progagated out of the next 28 bits. */ + fhi = 0; + ++e; + if (e >= 2047) + goto Overflow; + } } - } - /* First byte */ - *p = (sign << 7) | (e >> 4); - p += incr; + /* First byte */ + *p = (sign << 7) | (e >> 4); + p += incr; - /* Second byte */ - *p = (unsigned char) (((e & 0xF) << 4) | (fhi >> 24)); - p += incr; + /* Second byte */ + *p = (unsigned char) (((e & 0xF) << 4) | (fhi >> 24)); + p += incr; - /* Third byte */ - *p = (fhi >> 16) & 0xFF; - p += incr; + /* Third byte */ + *p = (fhi >> 16) & 0xFF; + p += incr; - /* Fourth byte */ - *p = (fhi >> 8) & 0xFF; - p += incr; + /* Fourth byte */ + *p = (fhi >> 8) & 0xFF; + p += incr; - /* Fifth byte */ - *p = fhi & 0xFF; - p += incr; + /* Fifth byte */ + *p = fhi & 0xFF; + p += incr; - /* Sixth byte */ - *p = (flo >> 16) & 0xFF; - p += incr; + /* Sixth byte */ + *p = (flo >> 16) & 0xFF; + p += incr; - /* Seventh byte */ - *p = (flo >> 8) & 0xFF; - p += incr; + /* Seventh byte */ + *p = (flo >> 8) & 0xFF; + p += incr; - /* Eighth byte */ - *p = flo & 0xFF; - p += incr; + /* Eighth byte */ + *p = flo & 0xFF; + p += incr; - /* Done */ - return 0; + /* Done */ + return 0; - Overflow: - PyErr_SetString(PyExc_OverflowError, - "float too large to pack with d format"); - return -1; + Overflow: + PyErr_SetString(PyExc_OverflowError, + "float too large to pack with d format"); + return -1; + } + else { + const char *s = (char*)&x; + int i, incr = 1; + + if ((double_format == ieee_little_endian_format && !le) + || (double_format == ieee_big_endian_format && le)) { + p += 7; + incr = -1; + } + + for (i = 0; i < 8; i++) { + *p = *s++; + p += incr; + } + return 0; + } } double _PyFloat_Unpack4(const unsigned char *p, int le) { - unsigned char sign; - int e; - unsigned int f; - double x; - int incr = 1; + if (float_format == unknown_format) { + unsigned char sign; + int e; + unsigned int f; + double x; + int incr = 1; - if (le) { - p += 3; - incr = -1; - } + if (le) { + p += 3; + incr = -1; + } - /* First byte */ - sign = (*p >> 7) & 1; - e = (*p & 0x7F) << 1; - p += incr; + /* First byte */ + sign = (*p >> 7) & 1; + e = (*p & 0x7F) << 1; + p += incr; - /* Second byte */ - e |= (*p >> 7) & 1; - f = (*p & 0x7F) << 16; - p += incr; + /* Second byte */ + e |= (*p >> 7) & 1; + f = (*p & 0x7F) << 16; + p += incr; - /* Third byte */ - f |= *p << 8; - p += incr; + if (e == 255) { + PyErr_SetString( + PyExc_ValueError, + "can't unpack IEEE 754 special value " + "on non-IEEE platform"); + return -1; + } - /* Fourth byte */ - f |= *p; + /* Third byte */ + f |= *p << 8; + p += incr; - x = (double)f / 8388608.0; + /* Fourth byte */ + f |= *p; - /* XXX This sadly ignores Inf/NaN issues */ - if (e == 0) - e = -126; - else { - x += 1.0; - e -= 127; - } - x = ldexp(x, e); + x = (double)f / 8388608.0; - if (sign) - x = -x; + /* XXX This sadly ignores Inf/NaN issues */ + if (e == 0) + e = -126; + else { + x += 1.0; + e -= 127; + } + x = ldexp(x, e); - return x; + if (sign) + x = -x; + + return x; + } + else { + if ((float_format == ieee_little_endian_format && !le) + || (float_format == ieee_big_endian_format && le)) { + char buf[8]; + char *d = &buf[3]; + int i; + + for (i = 0; i < 4; i++) { + *d-- = *p++; + } + return *(float*)&buf[0]; + } + else { + return *(float*)p; + } + } } double _PyFloat_Unpack8(const unsigned char *p, int le) { - unsigned char sign; - int e; - unsigned int fhi, flo; - double x; - int incr = 1; + if (double_format == unknown_format) { + unsigned char sign; + int e; + unsigned int fhi, flo; + double x; + int incr = 1; - if (le) { - p += 7; - incr = -1; - } + if (le) { + p += 7; + incr = -1; + } - /* First byte */ - sign = (*p >> 7) & 1; - e = (*p & 0x7F) << 4; - p += incr; + /* First byte */ + sign = (*p >> 7) & 1; + e = (*p & 0x7F) << 4; + + p += incr; - /* Second byte */ - e |= (*p >> 4) & 0xF; - fhi = (*p & 0xF) << 24; - p += incr; + /* Second byte */ + e |= (*p >> 4) & 0xF; + fhi = (*p & 0xF) << 24; + p += incr; - /* Third byte */ - fhi |= *p << 16; - p += incr; + if (e == 2047) { + PyErr_SetString( + PyExc_ValueError, + "can't unpack IEEE 754 special value " + "on non-IEEE platform"); + return -1.0; + } - /* Fourth byte */ - fhi |= *p << 8; - p += incr; + /* Third byte */ + fhi |= *p << 16; + p += incr; - /* Fifth byte */ - fhi |= *p; - p += incr; + /* Fourth byte */ + fhi |= *p << 8; + p += incr; - /* Sixth byte */ - flo = *p << 16; - p += incr; + /* Fifth byte */ + fhi |= *p; + p += incr; - /* Seventh byte */ - flo |= *p << 8; - p += incr; + /* Sixth byte */ + flo = *p << 16; + p += incr; - /* Eighth byte */ - flo |= *p; + /* Seventh byte */ + flo |= *p << 8; + p += incr; - x = (double)fhi + (double)flo / 16777216.0; /* 2**24 */ - x /= 268435456.0; /* 2**28 */ + /* Eighth byte */ + flo |= *p; - /* XXX This sadly ignores Inf/NaN */ - if (e == 0) - e = -1022; - else { - x += 1.0; - e -= 1023; - } - x = ldexp(x, e); + x = (double)fhi + (double)flo / 16777216.0; /* 2**24 */ + x /= 268435456.0; /* 2**28 */ - if (sign) - x = -x; + if (e == 0) + e = -1022; + else { + x += 1.0; + e -= 1023; + } + x = ldexp(x, e); - return x; + if (sign) + x = -x; + + return x; + } + else { + if ((double_format == ieee_little_endian_format && !le) + || (double_format == ieee_big_endian_format && le)) { + char buf[8]; + char *d = &buf[7]; + int i; + + for (i = 0; i < 8; i++) { + *d-- = *p++; + } + return *(double*)&buf[0]; + } + else { + return *(double*)p; + } + } } Index: funcobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/funcobject.c,v retrieving revision 2.55.2.4 retrieving revision 2.55.2.5 diff -u -d -r2.55.2.4 -r2.55.2.5 --- funcobject.c 14 Oct 2005 20:09:47 -0000 2.55.2.4 +++ funcobject.c 16 Oct 2005 05:24:04 -0000 2.55.2.5 @@ -264,8 +264,6 @@ static PyObject * func_get_name(PyFunctionObject *op) { - if (restricted()) - return NULL; Py_INCREF(op->func_name); return op->func_name; } Index: genobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/genobject.c,v retrieving revision 1.4.4.1 retrieving revision 1.4.4.2 diff -u -d -r1.4.4.1 -r1.4.4.2 --- genobject.c 7 Jan 2005 07:04:02 -0000 1.4.4.1 +++ genobject.c 16 Oct 2005 05:24:04 -0000 1.4.4.2 @@ -15,15 +15,31 @@ static void gen_dealloc(PyGenObject *gen) { + PyObject *self = (PyObject *) gen; + _PyObject_GC_UNTRACK(gen); + if (gen->gi_weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) gen); - Py_DECREF(gen->gi_frame); + + + _PyObject_GC_TRACK(self); + + if (gen->gi_frame->f_stacktop!=NULL) { + /* Generator is paused, so we need to close */ + gen->ob_type->tp_del(self); + if (self->ob_refcnt > 0) + return; /* resurrected. :( */ + } + + _PyObject_GC_UNTRACK(self); + Py_XDECREF(gen->gi_frame); PyObject_GC_Del(gen); } + static PyObject * -gen_iternext(PyGenObject *gen) +gen_send_ex(PyGenObject *gen, PyObject *arg, int exc) { PyThreadState *tstate = PyThreadState_GET(); PyFrameObject *f = gen->gi_frame; @@ -34,8 +50,24 @@ "generator already executing"); return NULL; } - if (f->f_stacktop == NULL) + if ((PyObject *)f == Py_None || f->f_stacktop == NULL) { + /* Only set exception if called from send() */ + if (arg && !exc) PyErr_SetNone(PyExc_StopIteration); return NULL; + } + + if (f->f_lasti == -1) { + if (arg && arg != Py_None) { + PyErr_SetString(PyExc_TypeError, + "can't send non-None value to a just-started generator"); + return NULL; + } + } else { + /* Push arg onto the frame's value stack */ + result = arg ? arg : Py_None; + Py_INCREF(result); + *(f->f_stacktop++) = result; + } /* Generators always return to their most recent caller, not * necessarily their creator. */ @@ -44,13 +76,13 @@ f->f_back = tstate->frame; gen->gi_running = 1; - result = PyEval_EvalFrame(f); + result = PyEval_EvalFrameEx(f, exc); gen->gi_running = 0; /* Don't keep the reference to f_back any longer than necessary. It * may keep a chain of frames alive or it could create a reference * cycle. */ - assert(f->f_back != NULL); + assert(f->f_back == tstate->frame); Py_CLEAR(f->f_back); /* If the generator just returned (as opposed to yielding), signal @@ -58,17 +90,199 @@ if (result == Py_None && f->f_stacktop == NULL) { Py_DECREF(result); result = NULL; + /* Set exception if not called by gen_iternext() */ + if (arg) PyErr_SetNone(PyExc_StopIteration); + } + + if (!result || f->f_stacktop == NULL) { + /* generator can't be rerun, so release the frame */ + Py_DECREF(f); + gen->gi_frame = (PyFrameObject *)Py_None; + Py_INCREF(Py_None); } return result; } +PyDoc_STRVAR(send_doc, +"send(arg) -> send 'arg' into generator, return next yielded value or raise StopIteration."); + +static PyObject * +gen_send(PyGenObject *gen, PyObject *arg) +{ + return gen_send_ex(gen, arg, 0); +} + +PyDoc_STRVAR(close_doc, +"close(arg) -> raise GeneratorExit inside generator."); + +static PyObject * +gen_close(PyGenObject *gen, PyObject *args) +{ + PyObject *retval; + PyErr_SetNone(PyExc_GeneratorExit); + retval = gen_send_ex(gen, Py_None, 1); + if (retval) { + Py_DECREF(retval); + PyErr_SetString(PyExc_RuntimeError, + "generator ignored GeneratorExit"); + return NULL; + } + if ( PyErr_ExceptionMatches(PyExc_StopIteration) + || PyErr_ExceptionMatches(PyExc_GeneratorExit) ) + { + PyErr_Clear(); /* ignore these errors */ + Py_INCREF(Py_None); + return Py_None; + } + return NULL; +} + +static void +gen_del(PyObject *self) +{ + PyObject *res; + PyObject *error_type, *error_value, *error_traceback; + PyGenObject *gen = (PyGenObject *)self; + + if ((PyObject *)gen->gi_frame == Py_None || gen->gi_frame->f_stacktop==NULL) + /* Generator isn't paused, so no need to close */ + return; + + /* Temporarily resurrect the object. */ + assert(self->ob_refcnt == 0); + self->ob_refcnt = 1; + + /* Save the current exception, if any. */ + PyErr_Fetch(&error_type, &error_value, &error_traceback); + + res = gen_close((PyGenObject *)self, NULL); + + if (res == NULL) + PyErr_WriteUnraisable((PyObject *)self); + else + Py_DECREF(res); + + /* Restore the saved exception. */ + PyErr_Restore(error_type, error_value, error_traceback); + + /* Undo the temporary resurrection; can't use DECREF here, it would + * cause a recursive call. + */ + assert(self->ob_refcnt > 0); + if (--self->ob_refcnt == 0) + return; /* this is the normal path out */ + + /* close() resurrected it! Make it look like the original Py_DECREF + * never happened. + */ + { + int refcnt = self->ob_refcnt; + _Py_NewReference(self); + self->ob_refcnt = refcnt; + } + assert(!PyType_IS_GC(self->ob_type) || + _Py_AS_GC(self)->gc.gc_refs != _PyGC_REFS_UNTRACKED); + + /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so + * we need to undo that. */ + _Py_DEC_REFTOTAL; + /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object + * chain, so no more to do there. + * If COUNT_ALLOCS, the original decref bumped tp_frees, and + * _Py_NewReference bumped tp_allocs: both of those need to be + * undone. + */ +#ifdef COUNT_ALLOCS + --self->ob_type->tp_frees; + --self->ob_type->tp_allocs; +#endif +} + + + +PyDoc_STRVAR(throw_doc, +"throw(typ[,val[,tb]]) -> raise exception in generator, return next yielded value or raise StopIteration."); + +static PyObject * +gen_throw(PyGenObject *gen, PyObject *args) +{ + PyObject *typ; + PyObject *tb = NULL; + PyObject *val = NULL; + + if (!PyArg_ParseTuple(args, "O|OO:throw", &typ, &val, &tb)) + return NULL; + + if (tb && !PyTraceBack_Check(tb)) { + PyErr_SetString(PyExc_TypeError, + "throw() third argument must be a traceback object"); + return NULL; + } + + Py_INCREF(typ); + Py_XINCREF(val); + Py_XINCREF(tb); + + if (PyClass_Check(typ)) { + PyErr_NormalizeException(&typ, &val, &tb); + } + + else if (PyInstance_Check(typ)) { + /* Raising an instance. The value should be a dummy. */ + if (val && val != Py_None) { + PyErr_SetString(PyExc_TypeError, + "instance exception may not have a separate value"); + goto failed_throw; + } + else { + /* Normalize to raise , */ + val = typ; + typ = (PyObject*) ((PyInstanceObject*)typ)->in_class; + Py_INCREF(typ); + } + } + else { + /* Not something you can raise. You get an exception + anyway, just not what you specified :-) */ + PyErr_Format(PyExc_TypeError, + "exceptions must be classes, or instances, not %s", + typ->ob_type->tp_name); + goto failed_throw; + } + + PyErr_Restore(typ,val,tb); + return gen_send_ex(gen, Py_None, 1); + +failed_throw: + /* Didn't use our arguments, so restore their original refcounts */ + Py_DECREF(typ); + Py_XDECREF(val); + Py_XDECREF(tb); + return NULL; +} + + +static PyObject * +gen_iternext(PyGenObject *gen) +{ + return gen_send_ex(gen, NULL, 0); +} + + static PyMemberDef gen_memberlist[] = { {"gi_frame", T_OBJECT, offsetof(PyGenObject, gi_frame), RO}, {"gi_running", T_INT, offsetof(PyGenObject, gi_running), RO}, {NULL} /* Sentinel */ }; +static PyMethodDef gen_methods[] = { + {"send",(PyCFunction)gen_send, METH_O, send_doc}, + {"throw",(PyCFunction)gen_throw, METH_VARARGS, throw_doc}, + {"close",(PyCFunction)gen_close, METH_NOARGS, close_doc}, + {NULL, NULL} /* Sentinel */ +}; + PyTypeObject PyGen_Type = { PyObject_HEAD_INIT(&PyType_Type) 0, /* ob_size */ @@ -99,11 +313,26 @@ offsetof(PyGenObject, gi_weakreflist), /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc)gen_iternext, /* tp_iternext */ - 0, /* tp_methods */ + gen_methods, /* tp_methods */ gen_memberlist, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ + + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + gen_del, /* tp_del */ }; PyObject * Index: intobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/intobject.c,v retrieving revision 2.84.2.2 retrieving revision 2.84.2.3 diff -u -d -r2.84.2.2 -r2.84.2.3 --- intobject.c 7 Jan 2005 07:04:02 -0000 2.84.2.2 +++ intobject.c 16 Oct 2005 05:24:04 -0000 2.84.2.3 @@ -826,7 +826,10 @@ static PyObject * int_int(PyIntObject *v) { - Py_INCREF(v); + if (PyInt_CheckExact(v)) + Py_INCREF(v); + else + v = (PyIntObject *)PyInt_FromLong(v->ob_ival); return (PyObject *)v; } Index: iterobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/iterobject.c,v retrieving revision 1.10.2.2 retrieving revision 1.10.2.3 diff -u -d -r1.10.2.2 -r1.10.2.3 --- iterobject.c 7 Jan 2005 07:04:02 -0000 1.10.2.2 +++ iterobject.c 16 Oct 2005 05:24:04 -0000 1.10.2.3 @@ -71,7 +71,7 @@ return NULL; } -static int +static PyObject * iter_len(seqiterobject *it) { int seqsize, len; @@ -79,17 +79,19 @@ if (it->it_seq) { seqsize = PySequence_Size(it->it_seq); if (seqsize == -1) - return -1; + return NULL; len = seqsize - it->it_index; if (len >= 0) - return len; + return PyInt_FromLong(len); } - return 0; + return PyInt_FromLong(0); } -static PySequenceMethods iter_as_sequence = { - (inquiry)iter_len, /* sq_length */ - 0, /* sq_concat */ +PyDoc_STRVAR(length_cue_doc, "Private method returning an estimate of len(list(it))."); + +static PyMethodDef seqiter_methods[] = { + {"_length_cue", (PyCFunction)iter_len, METH_NOARGS, length_cue_doc}, + {NULL, NULL} /* sentinel */ }; PyTypeObject PySeqIter_Type = { @@ -106,7 +108,7 @@ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ - &iter_as_sequence, /* tp_as_sequence */ + 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ @@ -122,13 +124,8 @@ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc)iter_iternext, /* tp_iternext */ - 0, /* tp_methods */ + seqiter_methods, /* tp_methods */ 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ }; /* -------------------------------------- */ @@ -236,10 +233,4 @@ PyObject_SelfIter, /* tp_iter */ (iternextfunc)calliter_iternext, /* tp_iternext */ 0, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ }; Index: listobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/listobject.c,v retrieving revision 2.114.2.2 retrieving revision 2.114.2.3 diff -u -d -r2.114.2.2 -r2.114.2.3 --- listobject.c 7 Jan 2005 07:04:02 -0000 2.114.2.2 +++ listobject.c 16 Oct 2005 05:24:04 -0000 2.114.2.3 @@ -775,8 +775,13 @@ iternext = *it->ob_type->tp_iternext; /* Guess a result list size. */ - n = PyObject_Size(b); + n = _PyObject_LengthCue(b); if (n < 0) { + if (!PyErr_ExceptionMatches(PyExc_TypeError) && + !PyErr_ExceptionMatches(PyExc_AttributeError)) { + Py_DECREF(it); + return NULL; + } PyErr_Clear(); n = 8; /* arbitrary */ } @@ -2759,21 +2764,23 @@ return NULL; } -static int +static PyObject * listiter_len(listiterobject *it) { int len; if (it->it_seq) { len = PyList_GET_SIZE(it->it_seq) - it->it_index; if (len >= 0) - return len; + return PyInt_FromLong((long)len); } - return 0; + return PyInt_FromLong(0); } -static PySequenceMethods listiter_as_sequence = { - (inquiry)listiter_len, /* sq_length */ - 0, /* sq_concat */ +PyDoc_STRVAR(length_cue_doc, "Private method returning an estimate of len(list(it))."); + +static PyMethodDef listiter_methods[] = { + {"_length_cue", (PyCFunction)listiter_len, METH_NOARGS, length_cue_doc}, + {NULL, NULL} /* sentinel */ }; PyTypeObject PyListIter_Type = { @@ -2790,7 +2797,7 @@ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ - &listiter_as_sequence, /* tp_as_sequence */ + 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ @@ -2806,13 +2813,8 @@ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc)listiter_next, /* tp_iternext */ - 0, /* tp_methods */ + listiter_methods, /* tp_methods */ 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ }; /*********************** List Reverse Iterator **************************/ Index: listsort.txt =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/listsort.txt,v retrieving revision 1.6.10.1 retrieving revision 1.6.10.2 diff -u -d -r1.6.10.1 -r1.6.10.2 --- listsort.txt 28 Apr 2003 17:18:16 -0000 1.6.10.1 +++ listsort.txt 16 Oct 2005 05:24:04 -0000 1.6.10.2 @@ -54,6 +54,16 @@ + Here are exact comparison counts across all the tests in sortperf.py, when run with arguments "15 20 1". + Column Key: + *sort: random data + \sort: descending data + /sort: ascending data + 3sort: ascending, then 3 random exchanges + +sort: ascending, then 10 random at the end + ~sort: many duplicates + =sort: all equal + !sort: worst case scenario + First the trivial cases, trivial for samplesort because it special-cased them, and trivial for timsort because it naturally works on runs. Within an "n" block, the first line gives the # of compares done by samplesort, Index: longobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/longobject.c,v retrieving revision 1.118.2.2 retrieving revision 1.118.2.3 diff -u -d -r1.118.2.2 -r1.118.2.3 --- longobject.c 7 Jan 2005 07:04:03 -0000 1.118.2.2 +++ longobject.c 16 Oct 2005 05:24:04 -0000 1.118.2.3 @@ -783,9 +783,30 @@ return -1; } if (!PyLong_Check(vv)) { + PyNumberMethods *nb; + PyObject *io; if (PyInt_Check(vv)) return (PY_LONG_LONG)PyInt_AsLong(vv); - PyErr_BadInternalCall(); + if ((nb = vv->ob_type->tp_as_number) == NULL || + nb->nb_int == NULL) { + PyErr_SetString(PyExc_TypeError, "an integer is required"); + return -1; + } + io = (*nb->nb_int) (vv); + if (io == NULL) + return -1; + if (PyInt_Check(io)) { + bytes = PyInt_AsLong(io); + Py_DECREF(io); + return bytes; + } + if (PyLong_Check(io)) { + bytes = PyLong_AsLongLong(io); + Py_DECREF(io); + return bytes; + } + Py_DECREF(io); + PyErr_SetString(PyExc_TypeError, "integer conversion failed"); return -1; } @@ -1069,7 +1090,7 @@ assert(accumbits >= basebits); do { char cdigit = (char)(accum & (base - 1)); - cdigit += (cdigit < 10) ? '0' : 'A'-10; + cdigit += (cdigit < 10) ? '0' : 'a'-10; assert(p > PyString_AS_STRING(str)); *--p = cdigit; accumbits -= basebits; @@ -1123,7 +1144,7 @@ digit nextrem = (digit)(rem / base); char c = (char)(rem - nextrem * base); assert(p > PyString_AS_STRING(str)); - c += (c < 10) ? '0' : 'A'-10; + c += (c < 10) ? '0' : 'a'-10; *--p = c; rem = nextrem; --ntostore; @@ -2339,8 +2360,11 @@ c = (PyLongObject *)x; Py_INCREF(x); } - else if (PyInt_Check(x)) + else if (PyInt_Check(x)) { c = (PyLongObject *)PyLong_FromLong(PyInt_AS_LONG(x)); + if (c == NULL) + goto Error; + } else if (x == Py_None) c = NULL; else { @@ -2490,14 +2514,14 @@ } /* fall through */ Done: - Py_XDECREF(a); - Py_XDECREF(b); - Py_XDECREF(c); - Py_XDECREF(temp); if (b->ob_size > FIVEARY_CUTOFF) { for (i = 0; i < 32; ++i) Py_XDECREF(table[i]); } + Py_DECREF(a); + Py_DECREF(b); + Py_XDECREF(c); + Py_XDECREF(temp); return (PyObject *)z; } @@ -2840,7 +2864,10 @@ static PyObject * long_long(PyObject *v) { - Py_INCREF(v); + if (PyLong_CheckExact(v)) + Py_INCREF(v); + else + v = _PyLong_Copy((PyLongObject *)v); return v; } Index: object.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/object.c,v retrieving revision 2.179.2.3 retrieving revision 2.179.2.4 diff -u -d -r2.179.2.3 -r2.179.2.4 --- object.c 7 Jan 2005 07:04:04 -0000 2.179.2.3 +++ object.c 16 Oct 2005 05:24:04 -0000 2.179.2.4 @@ -331,22 +331,48 @@ } PyObject * -PyObject_Str(PyObject *v) +_PyObject_Str(PyObject *v) { PyObject *res; - + int type_ok; if (v == NULL) return PyString_FromString(""); if (PyString_CheckExact(v)) { Py_INCREF(v); return v; } +#ifdef Py_USING_UNICODE + if (PyUnicode_CheckExact(v)) { + Py_INCREF(v); + return v; + } +#endif if (v->ob_type->tp_str == NULL) return PyObject_Repr(v); res = (*v->ob_type->tp_str)(v); if (res == NULL) return NULL; + type_ok = PyString_Check(res); +#ifdef Py_USING_UNICODE + type_ok = type_ok || PyUnicode_Check(res); +#endif + if (!type_ok) { + PyErr_Format(PyExc_TypeError, + "__str__ returned non-string (type %.200s)", + res->ob_type->tp_name); + Py_DECREF(res); + return NULL; + } + return res; +} + +PyObject * +PyObject_Str(PyObject *v) +{ + PyObject *res = _PyObject_Str(v); + if (res == NULL) + return NULL; #ifdef Py_USING_UNICODE if (PyUnicode_Check(res)) { PyObject* str; @@ -358,13 +384,7 @@ return NULL; } #endif - if (!PyString_Check(res)) { - PyErr_Format(PyExc_TypeError, - "__str__ returned non-string (type %.200s)", - res->ob_type->tp_name); - Py_DECREF(res); - return NULL; - } + assert(PyString_Check(res)); return res; } @@ -373,6 +393,8 @@ PyObject_Unicode(PyObject *v) { PyObject *res; + PyObject *func; + static PyObject *unicodestr; if (v == NULL) res = PyString_FromString(""); @@ -380,35 +402,32 @@ Py_INCREF(v); return v; } - if (PyUnicode_Check(v)) { - /* For a Unicode subtype that's not a Unicode object, - return a true Unicode object with the same data. */ - return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(v), - PyUnicode_GET_SIZE(v)); + /* XXX As soon as we have a tp_unicode slot, we should + check this before trying the __unicode__ + method. */ + if (unicodestr == NULL) { + unicodestr= PyString_InternFromString("__unicode__"); + if (unicodestr == NULL) + return NULL; + } + func = PyObject_GetAttr(v, unicodestr); + if (func != NULL) { + res = PyEval_CallObject(func, (PyObject *)NULL); + Py_DECREF(func); } - if (PyString_Check(v)) { - Py_INCREF(v); - res = v; - } else { - PyObject *func; - static PyObject *unicodestr; - /* XXX As soon as we have a tp_unicode slot, we should - check this before trying the __unicode__ - method. */ - if (unicodestr == NULL) { - unicodestr= PyString_InternFromString( - "__unicode__"); - if (unicodestr == NULL) - return NULL; + PyErr_Clear(); + if (PyUnicode_Check(v)) { + /* For a Unicode subtype that's didn't overwrite __unicode__, + return a true Unicode object with the same data. */ + return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(v), + PyUnicode_GET_SIZE(v)); } - func = PyObject_GetAttr(v, unicodestr); - if (func != NULL) { - res = PyEval_CallObject(func, (PyObject *)NULL); - Py_DECREF(func); + if (PyString_CheckExact(v)) { + Py_INCREF(v); + res = v; } else { - PyErr_Clear(); if (v->ob_type->tp_str != NULL) res = (*v->ob_type->tp_str)(v); else @@ -424,7 +443,7 @@ if (str) res = str; else - return NULL; + return NULL; } return res; } Index: obmalloc.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/obmalloc.c,v retrieving revision 2.45.2.2 retrieving revision 2.45.2.3 diff -u -d -r2.45.2.2 -r2.45.2.3 --- obmalloc.c 7 Jan 2005 07:04:04 -0000 2.45.2.2 +++ obmalloc.c 16 Oct 2005 05:24:04 -0000 2.45.2.3 @@ -139,9 +139,9 @@ * getpagesize() call or deduced from various header files. To make * things simpler, we assume that it is 4K, which is OK for most systems. * It is probably better if this is the native page size, but it doesn't - * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page - * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation - * violation fault. 4K is apparently OK for all the platforms that python + * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page + * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation + * violation fault. 4K is apparently OK for all the platforms that python * currently targets. */ #define SYSTEM_PAGE_SIZE (4 * 1024) @@ -841,30 +841,26 @@ } return bp; } - /* We're not managing this block. */ - if (nbytes <= SMALL_REQUEST_THRESHOLD) { - /* Take over this block -- ask for at least one byte so - * we really do take it over (PyObject_Malloc(0) goes to - * the system malloc). - */ - bp = PyObject_Malloc(nbytes ? nbytes : 1); - if (bp != NULL) { - memcpy(bp, p, nbytes); - free(p); - } - else if (nbytes == 0) { - /* Meet the doc's promise that nbytes==0 will - * never return a NULL pointer when p isn't NULL. - */ - bp = p; - } - - } - else { - assert(nbytes != 0); - bp = realloc(p, nbytes); - } - return bp; + /* We're not managing this block. If nbytes <= + * SMALL_REQUEST_THRESHOLD, it's tempting to try to take over this + * block. However, if we do, we need to copy the valid data from + * the C-managed block to one of our blocks, and there's no portable + * way to know how much of the memory space starting at p is valid. + * As bug 1185883 pointed out the hard way, it's possible that the + * C-managed block is "at the end" of allocated VM space, so that + * a memory fault can occur if we try to copy nbytes bytes starting + * at p. Instead we punt: let C continue to manage this block. + */ + if (nbytes) + return realloc(p, nbytes); + /* C doesn't define the result of realloc(p, 0) (it may or may not + * return NULL then), but Python's docs promise that nbytes==0 never + * returns NULL. We don't pass 0 to realloc(), to avoid that endcase + * to begin with. Even then, we can't be sure that realloc() won't + * return NULL. + */ + bp = realloc(p, 1); + return bp ? bp : p; } #else /* ! WITH_PYMALLOC */ Index: rangeobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/rangeobject.c,v retrieving revision 2.41.2.2 retrieving revision 2.41.2.3 diff -u -d -r2.41.2.2 -r2.41.2.3 --- rangeobject.c 7 Jan 2005 07:04:04 -0000 2.41.2.2 +++ rangeobject.c 16 Oct 2005 05:24:04 -0000 2.41.2.3 @@ -45,6 +45,9 @@ long ilow = 0, ihigh = 0, istep = 1; long n; + if (!_PyArg_NoKeywords("xrange()", kw)) + return NULL; + if (PyTuple_Size(args) <= 1) { if (!PyArg_ParseTuple(args, "l;xrange() requires 1-3 int arguments", @@ -259,17 +262,18 @@ return NULL; } -static int +static PyObject * rangeiter_len(rangeiterobject *r) { - return r->len - r->index; + return PyInt_FromLong(r->len - r->index); } -static PySequenceMethods rangeiter_as_sequence = { - (inquiry)rangeiter_len, /* sq_length */ - 0, /* sq_concat */ -}; +PyDoc_STRVAR(length_cue_doc, "Private method returning an estimate of len(list(it))."); +static PyMethodDef rangeiter_methods[] = { + {"_length_cue", (PyCFunction)rangeiter_len, METH_NOARGS, length_cue_doc}, + {NULL, NULL} /* sentinel */ +}; static PyTypeObject Pyrangeiter_Type = { PyObject_HEAD_INIT(&PyType_Type) @@ -285,7 +289,7 @@ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ - &rangeiter_as_sequence, /* tp_as_sequence */ + 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ @@ -301,5 +305,6 @@ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc)rangeiter_next, /* tp_iternext */ - 0, /* tp_methods */ + rangeiter_methods, /* tp_methods */ + 0, }; Index: setobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/setobject.c,v retrieving revision 1.31.4.1 retrieving revision 1.31.4.2 diff -u -d -r1.31.4.1 -r1.31.4.2 --- setobject.c 7 Jan 2005 07:04:04 -0000 1.31.4.1 +++ setobject.c 16 Oct 2005 05:24:04 -0000 1.31.4.2 @@ -1,41 +1,882 @@ -#include "Python.h" -#include "structmember.h" /* set object implementation - written and maintained by Raymond D. Hettinger - derived from sets.py written by Greg V. Wilson, Alex Martelli, - Guido van Rossum, Raymond Hettinger, and Tim Peters. + Written and maintained by Raymond D. Hettinger + Derived from Lib/sets.py and Objects/dictobject.c. - Copyright (c) 2003 Python Software Foundation. [...2120 lines suppressed...] + + /* Verify constructors accept NULL arguments */ + f = PySet_New(NULL); + assert(f != NULL); + assert(PySet_GET_SIZE(f) == 0); + Py_DECREF(f); + f = PyFrozenSet_New(NULL); + assert(f != NULL); + assert(PyFrozenSet_CheckExact(f)); + assert(PySet_GET_SIZE(f) == 0); + Py_DECREF(f); + + Py_DECREF(elem); + Py_DECREF(dup); + Py_RETURN_TRUE; +} + +#undef assertRaises + +#endif Index: sliceobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/sliceobject.c,v retrieving revision 2.15.2.2 retrieving revision 2.15.2.3 diff -u -d -r2.15.2.2 -r2.15.2.3 --- sliceobject.c 7 Jan 2005 07:04:05 -0000 2.15.2.2 +++ sliceobject.c 16 Oct 2005 05:24:04 -0000 2.15.2.3 @@ -174,6 +174,9 @@ start = stop = step = NULL; + if (!_PyArg_NoKeywords("slice()", kw)) + return NULL; + if (!PyArg_UnpackTuple(args, "slice", 1, 3, &start, &stop, &step)) return NULL; Index: stringobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v retrieving revision 2.168.2.2 retrieving revision 2.168.2.3 diff -u -d -r2.168.2.2 -r2.168.2.3 --- stringobject.c 7 Jan 2005 07:04:05 -0000 2.168.2.2 +++ stringobject.c 16 Oct 2005 05:24:04 -0000 2.168.2.3 @@ -52,6 +52,7 @@ PyString_FromStringAndSize(const char *str, int size) { register PyStringObject *op; + assert(size >= 0); if (size == 0 && (op = nullstring) != NULL) { #ifdef COUNT_ALLOCS null_strings++; @@ -1001,8 +1002,12 @@ static int string_contains(PyObject *a, PyObject *el) { - const char *lhs, *rhs, *end; - int size; + char *s = PyString_AS_STRING(a); + const char *sub = PyString_AS_STRING(el); + char *last; + int len_sub = PyString_GET_SIZE(el); + int shortsub; + char firstchar, lastchar; if (!PyString_CheckExact(el)) { #ifdef Py_USING_UNICODE @@ -1015,20 +1020,29 @@ return -1; } } - size = PyString_GET_SIZE(el); - rhs = PyString_AS_STRING(el); - lhs = PyString_AS_STRING(a); - - /* optimize for a single character */ - if (size == 1) - return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL; - end = lhs + (PyString_GET_SIZE(a) - size); - while (lhs <= end) { - if (memcmp(lhs++, rhs, size) == 0) + if (len_sub == 0) + return 1; + /* last points to one char beyond the start of the rightmost + substring. When s= m) + break; + t = memchr(s+i, sub[0], m-i); + if (t == NULL) + break; + i = t - s; } return PyInt_FromLong((long) r); } - PyDoc_STRVAR(swapcase__doc__, "S.swapcase() -> string\n\ \n\ @@ -3734,18 +3753,12 @@ } /* Fix up case for hex conversions. */ - switch (type) { - case 'x': - /* Need to convert all upper case letters to lower case. */ + if (type == 'X') { + /* Need to convert all lower case letters to upper case. + and need to convert 0x to 0X (and -0x to -0X). */ for (i = 0; i < len; i++) - if (buf[i] >= 'A' && buf[i] <= 'F') - buf[i] += 'a'-'A'; - break; - case 'X': - /* Need to convert 0x to 0X (and -0x to -0X). */ - if (buf[sign + 1] == 'x') - buf[sign + 1] = 'X'; - break; + if (buf[i] >= 'a' && buf[i] <= 'x') + buf[i] -= 'a'-'A'; } *pbuf = buf; *plen = len; @@ -3840,7 +3853,6 @@ return 1; } - /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) FORMATBUFLEN is the length of the buffer in which the floats, ints, & @@ -4072,18 +4084,22 @@ goto unicode; } #endif + temp = _PyObject_Str(v); +#ifdef Py_USING_UNICODE + if (temp != NULL && PyUnicode_Check(temp)) { + Py_DECREF(temp); + fmt = fmt_start; + argidx = argidx_start; + goto unicode; + } +#endif /* Fall through */ case 'r': - if (c == 's') - temp = PyObject_Str(v); - else + if (c == 'r') temp = PyObject_Repr(v); if (temp == NULL) goto error; if (!PyString_Check(temp)) { - /* XXX Note: this should never happen, - since PyObject_Repr() and - PyObject_Str() assure this */ PyErr_SetString(PyExc_TypeError, "%s argument has non-string str()"); Py_DECREF(temp); Index: tupleobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/tupleobject.c,v retrieving revision 2.68.2.2 retrieving revision 2.68.2.3 diff -u -d -r2.68.2.2 -r2.68.2.3 --- tupleobject.c 7 Jan 2005 07:04:06 -0000 2.68.2.2 +++ tupleobject.c 16 Oct 2005 05:24:04 -0000 2.68.2.3 @@ -851,17 +851,20 @@ return NULL; } -static int +static PyObject * tupleiter_len(tupleiterobject *it) { + int len = 0; if (it->it_seq) - return PyTuple_GET_SIZE(it->it_seq) - it->it_index; - return 0; + len = PyTuple_GET_SIZE(it->it_seq) - it->it_index; + return PyInt_FromLong(len); } -static PySequenceMethods tupleiter_as_sequence = { - (inquiry)tupleiter_len, /* sq_length */ - 0, /* sq_concat */ +PyDoc_STRVAR(length_cue_doc, "Private method returning an estimate of len(list(it))."); + +static PyMethodDef tupleiter_methods[] = { + {"_length_cue", (PyCFunction)tupleiter_len, METH_NOARGS, length_cue_doc}, + {NULL, NULL} /* sentinel */ }; PyTypeObject PyTupleIter_Type = { @@ -878,7 +881,7 @@ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ - &tupleiter_as_sequence, /* tp_as_sequence */ + 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ @@ -894,4 +897,6 @@ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc)tupleiter_next, /* tp_iternext */ + tupleiter_methods, /* tp_methods */ + 0, }; Index: typeobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/typeobject.c,v retrieving revision 2.157.2.3 retrieving revision 2.157.2.4 diff -u -d -r2.157.2.3 -r2.157.2.4 --- typeobject.c 7 Jan 2005 07:04:06 -0000 2.157.2.3 +++ typeobject.c 16 Oct 2005 05:24:04 -0000 2.157.2.4 @@ -2516,7 +2516,7 @@ clsdict = ((PyTypeObject *)cls)->tp_dict; slotnames = PyDict_GetItemString(clsdict, "__slotnames__"); - if (slotnames != NULL) { + if (slotnames != NULL && PyList_Check(slotnames)) { Py_INCREF(slotnames); return slotnames; } @@ -4065,14 +4065,24 @@ { static PyObject *len_str; PyObject *res = call_method(self, "__len__", &len_str, "()"); + long temp; int len; if (res == NULL) return -1; - len = (int)PyInt_AsLong(res); + temp = PyInt_AsLong(res); + len = (int)temp; Py_DECREF(res); if (len == -1 && PyErr_Occurred()) return -1; +#if SIZEOF_INT < SIZEOF_LONG + /* Overflow check -- range of PyInt is more than C int */ + if (len != temp) { + PyErr_SetString(PyExc_OverflowError, + "__len__() should return 0 <= outcome < 2**31"); + return -1; + } +#endif if (len < 0) { PyErr_SetString(PyExc_ValueError, "__len__() should return >= 0"); @@ -4747,6 +4757,12 @@ Py_DECREF(meth); if (res == NULL) return -1; + if (res != Py_None) { + PyErr_SetString(PyExc_TypeError, + "__init__() should return None"); + Py_DECREF(res); + return -1; + } Py_DECREF(res); return 0; } @@ -4896,6 +4912,12 @@ #define RBINSLOT(NAME, SLOT, FUNCTION, DOC) \ ETSLOT(NAME, as_number.SLOT, FUNCTION, wrap_binaryfunc_r, \ "x." NAME "(y) <==> y" DOC "x") +#define BINSLOTNOTINFIX(NAME, SLOT, FUNCTION, DOC) \ + ETSLOT(NAME, as_number.SLOT, FUNCTION, wrap_binaryfunc_l, \ + "x." NAME "(y) <==> " DOC) +#define RBINSLOTNOTINFIX(NAME, SLOT, FUNCTION, DOC) \ + ETSLOT(NAME, as_number.SLOT, FUNCTION, wrap_binaryfunc_r, \ + "x." NAME "(y) <==> " DOC) static slotdef slotdefs[] = { SQSLOT("__len__", sq_length, slot_sq_length, wrap_inquiry, @@ -4964,9 +4986,9 @@ "%"), RBINSLOT("__rmod__", nb_remainder, slot_nb_remainder, "%"), - BINSLOT("__divmod__", nb_divmod, slot_nb_divmod, + BINSLOTNOTINFIX("__divmod__", nb_divmod, slot_nb_divmod, "divmod(x, y)"), - RBINSLOT("__rdivmod__", nb_divmod, slot_nb_divmod, + RBINSLOTNOTINFIX("__rdivmod__", nb_divmod, slot_nb_divmod, "divmod(y, x)"), NBSLOT("__pow__", nb_power, slot_nb_power, wrap_ternaryfunc, "x.__pow__(y[, z]) <==> pow(x, y[, z])"), @@ -5636,7 +5658,7 @@ return self; } if (su->ob_type != &PySuper_Type) - /* If su is not an instance of a subclass of super, + /* If su is an instance of a (strict) subclass of super, call its type */ return PyObject_CallFunction((PyObject *)su->ob_type, "OO", su->type, obj); Index: unicodeobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v retrieving revision 2.155.2.2 retrieving revision 2.155.2.3 diff -u -d -r2.155.2.2 -r2.155.2.3 --- unicodeobject.c 7 Jan 2005 07:04:09 -0000 2.155.2.2 +++ unicodeobject.c 16 Oct 2005 05:24:04 -0000 2.155.2.3 @@ -2273,6 +2273,81 @@ PyUnicode_GET_SIZE(unicode)); } +/* --- Unicode Internal Codec ------------------------------------------- */ + +PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s, + int size, + const char *errors) +{ + const char *starts = s; + int startinpos; + int endinpos; + int outpos; + Py_UNICODE unimax; + PyUnicodeObject *v; + Py_UNICODE *p; + const char *end; + const char *reason; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + + unimax = PyUnicode_GetMax(); + v = _PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE); + if (v == NULL) + goto onError; + if (PyUnicode_GetSize((PyObject *)v) == 0) + return (PyObject *)v; + p = PyUnicode_AS_UNICODE(v); + end = s + size; + + while (s < end) { + *p = *(Py_UNICODE *)s; + /* We have to sanity check the raw data, otherwise doom looms for + some malformed UCS-4 data. */ + if ( + #ifdef Py_UNICODE_WIDE + *p > unimax || *p < 0 || + #endif + end-s < Py_UNICODE_SIZE + ) + { + startinpos = s - starts; + if (end-s < Py_UNICODE_SIZE) { + endinpos = end-starts; + reason = "truncated input"; + } + else { + endinpos = s - starts + Py_UNICODE_SIZE; + reason = "illegal code point (> 0x10FFFF)"; + } + outpos = p - PyUnicode_AS_UNICODE(v); + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "unicode_internal", reason, + starts, size, &startinpos, &endinpos, &exc, &s, + (PyObject **)&v, &outpos, &p)) { + goto onError; + } + } + else { + p++; + s += Py_UNICODE_SIZE; + } + } + + if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))) < 0) + goto onError; + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return (PyObject *)v; + + onError: + Py_XDECREF(v); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return NULL; +} + /* --- Latin-1 Codec ------------------------------------------------------ */ PyObject *PyUnicode_DecodeLatin1(const char *s, @@ -2758,6 +2833,8 @@ int extrachars = 0; PyObject *errorHandler = NULL; PyObject *exc = NULL; + Py_UNICODE *mapstring = NULL; + int maplen = 0; /* Default to Latin-1 */ if (mapping == NULL) @@ -2770,91 +2847,121 @@ return (PyObject *)v; p = PyUnicode_AS_UNICODE(v); e = s + size; - while (s < e) { - unsigned char ch = *s; - PyObject *w, *x; + if (PyUnicode_CheckExact(mapping)) { + mapstring = PyUnicode_AS_UNICODE(mapping); + maplen = PyUnicode_GET_SIZE(mapping); + while (s < e) { + unsigned char ch = *s; + Py_UNICODE x = 0xfffe; /* illegal value */ - /* Get mapping (char ordinal -> integer, Unicode char or None) */ - w = PyInt_FromLong((long)ch); - if (w == NULL) - goto onError; - x = PyObject_GetItem(mapping, w); - Py_DECREF(w); - if (x == NULL) { - if (PyErr_ExceptionMatches(PyExc_LookupError)) { - /* No mapping found means: mapping is undefined. */ - PyErr_Clear(); - x = Py_None; - Py_INCREF(x); - } else - goto onError; - } + if (ch < maplen) + x = mapstring[ch]; - /* Apply mapping */ - if (PyInt_Check(x)) { - long value = PyInt_AS_LONG(x); - if (value < 0 || value > 65535) { - PyErr_SetString(PyExc_TypeError, - "character mapping must be in range(65536)"); - Py_DECREF(x); - goto onError; + if (x == 0xfffe) { + /* undefined mapping */ + outpos = p-PyUnicode_AS_UNICODE(v); + startinpos = s-starts; + endinpos = startinpos+1; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "charmap", "character maps to ", + starts, size, &startinpos, &endinpos, &exc, &s, + (PyObject **)&v, &outpos, &p)) { + goto onError; + } + continue; } - *p++ = (Py_UNICODE)value; + *p++ = x; + ++s; } - else if (x == Py_None) { - /* undefined mapping */ - outpos = p-PyUnicode_AS_UNICODE(v); - startinpos = s-starts; - endinpos = startinpos+1; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "charmap", "character maps to ", - starts, size, &startinpos, &endinpos, &exc, &s, - (PyObject **)&v, &outpos, &p)) { - Py_DECREF(x); + } + else { + while (s < e) { + unsigned char ch = *s; + PyObject *w, *x; + + /* Get mapping (char ordinal -> integer, Unicode char or None) */ + w = PyInt_FromLong((long)ch); + if (w == NULL) goto onError; + x = PyObject_GetItem(mapping, w); + Py_DECREF(w); + if (x == NULL) { + if (PyErr_ExceptionMatches(PyExc_LookupError)) { + /* No mapping found means: mapping is undefined. */ + PyErr_Clear(); + x = Py_None; + Py_INCREF(x); + } else + goto onError; } - continue; - } - else if (PyUnicode_Check(x)) { - int targetsize = PyUnicode_GET_SIZE(x); - - if (targetsize == 1) - /* 1-1 mapping */ - *p++ = *PyUnicode_AS_UNICODE(x); - - else if (targetsize > 1) { - /* 1-n mapping */ - if (targetsize > extrachars) { - /* resize first */ - int oldpos = (int)(p - PyUnicode_AS_UNICODE(v)); - int needed = (targetsize - extrachars) + \ - (targetsize << 2); - extrachars += needed; - if (_PyUnicode_Resize(&v, - PyUnicode_GET_SIZE(v) + needed) < 0) { - Py_DECREF(x); - goto onError; + + /* Apply mapping */ + if (PyInt_Check(x)) { + long value = PyInt_AS_LONG(x); + if (value < 0 || value > 65535) { + PyErr_SetString(PyExc_TypeError, + "character mapping must be in range(65536)"); + Py_DECREF(x); + goto onError; + } + *p++ = (Py_UNICODE)value; + } + else if (x == Py_None) { + /* undefined mapping */ + outpos = p-PyUnicode_AS_UNICODE(v); + startinpos = s-starts; + endinpos = startinpos+1; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "charmap", "character maps to ", + starts, size, &startinpos, &endinpos, &exc, &s, + (PyObject **)&v, &outpos, &p)) { + Py_DECREF(x); + goto onError; + } + continue; + } + else if (PyUnicode_Check(x)) { + int targetsize = PyUnicode_GET_SIZE(x); + + if (targetsize == 1) + /* 1-1 mapping */ + *p++ = *PyUnicode_AS_UNICODE(x); + + else if (targetsize > 1) { + /* 1-n mapping */ + if (targetsize > extrachars) { + /* resize first */ + int oldpos = (int)(p - PyUnicode_AS_UNICODE(v)); + int needed = (targetsize - extrachars) + \ + (targetsize << 2); + extrachars += needed; + if (_PyUnicode_Resize(&v, + PyUnicode_GET_SIZE(v) + needed) < 0) { + Py_DECREF(x); + goto onError; + } + p = PyUnicode_AS_UNICODE(v) + oldpos; } - p = PyUnicode_AS_UNICODE(v) + oldpos; + Py_UNICODE_COPY(p, + PyUnicode_AS_UNICODE(x), + targetsize); + p += targetsize; + extrachars -= targetsize; } - Py_UNICODE_COPY(p, - PyUnicode_AS_UNICODE(x), - targetsize); - p += targetsize; - extrachars -= targetsize; + /* 1-0 mapping: skip the character */ + } + else { + /* wrong return value */ + PyErr_SetString(PyExc_TypeError, + "character mapping must return integer, None or unicode"); + Py_DECREF(x); + goto onError; } - /* 1-0 mapping: skip the character */ - } - else { - /* wrong return value */ - PyErr_SetString(PyExc_TypeError, - "character mapping must return integer, None or unicode"); Py_DECREF(x); - goto onError; + ++s; } - Py_DECREF(x); - ++s; } if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v)) if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))) < 0) Index: weakrefobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/weakrefobject.c,v retrieving revision 1.9.2.2 retrieving revision 1.9.2.3 diff -u -d -r1.9.2.2 -r1.9.2.3 --- weakrefobject.c 7 Jan 2005 07:04:10 -0000 1.9.2.2 +++ weakrefobject.c 16 Oct 2005 05:24:05 -0000 1.9.2.3 @@ -505,11 +505,7 @@ PyObject *o = PyWeakref_GET_OBJECT(proxy); if (!proxy_checkref(proxy)) return -1; - if (o->ob_type->tp_as_number && - o->ob_type->tp_as_number->nb_nonzero) - return (*o->ob_type->tp_as_number->nb_nonzero)(o); - else - return 1; + return PyObject_IsTrue(o); } static void From jhylton at users.sourceforge.net Sun Oct 16 07:24:12 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:12 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python bltinmodule.c, 2.261.2.4, 2.261.2.5 ceval.c, 2.314.2.6, 2.314.2.7 compile.c, 2.247.2.3, 2.247.2.4 errors.c, 2.70.2.3, 2.70.2.4 exceptions.c, 1.32.2.2, 1.32.2.3 future.c, 2.12.2.7, 2.12.2.8 getargs.c, 2.92.2.2, 2.92.2.3 getcopyright.c, 1.16.2.2, 1.16.2.3 graminit.c, 2.33.2.3, 2.33.2.4 import.c, 2.208.2.6, 2.208.2.7 marshal.c, 1.72.2.3, 1.72.2.4 pystate.c, 2.20.18.2, 2.20.18.3 pythonrun.c, 2.161.2.18, 2.161.2.19 structmember.c, 2.23.8.1, 2.23.8.2 sysmodule.c, 2.107.2.3, 2.107.2.4 thread.c, 2.44.2.2, 2.44.2.3 thread_nt.h, 2.22.2.1, 2.22.2.2 thread_os2.h, 2.14.2.1, 2.14.2.2 thread_pthread.h, 2.40.2.2, 2.40.2.3 thread_wince.h, 2.7, 2.7.12.1 Message-ID: <20051016052412.159FC1E4013@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Python Modified Files: Tag: ast-branch bltinmodule.c ceval.c compile.c errors.c exceptions.c future.c getargs.c getcopyright.c graminit.c import.c marshal.c pystate.c pythonrun.c structmember.c sysmodule.c thread.c thread_nt.h thread_os2.h thread_pthread.h thread_wince.h Log Message: Merge head to branch (for the last time) Index: bltinmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/bltinmodule.c,v retrieving revision 2.261.2.4 retrieving revision 2.261.2.5 diff -u -d -r2.261.2.4 -r2.261.2.5 --- bltinmodule.c 7 Jan 2005 07:04:35 -0000 2.261.2.4 +++ bltinmodule.c 16 Oct 2005 05:24:05 -0000 2.261.2.5 @@ -68,6 +68,69 @@ \n\ Return the absolute value of the argument."); +static PyObject * +builtin_all(PyObject *self, PyObject *v) +{ + PyObject *it, *item; + + it = PyObject_GetIter(v); + if (it == NULL) + return NULL; + + while ((item = PyIter_Next(it)) != NULL) { + int cmp = PyObject_IsTrue(item); + Py_DECREF(item); + if (cmp < 0) { + Py_DECREF(it); + return NULL; + } + if (cmp == 0) { + Py_DECREF(it); + Py_RETURN_FALSE; + } + } + Py_DECREF(it); + if (PyErr_Occurred()) + return NULL; + Py_RETURN_TRUE; +} + +PyDoc_STRVAR(all_doc, +"all(iterable) -> bool\n\ +\n\ +Return True if bool(x) is True for all values x in the iterable."); + +static PyObject * +builtin_any(PyObject *self, PyObject *v) +{ + PyObject *it, *item; + + it = PyObject_GetIter(v); + if (it == NULL) + return NULL; + + while ((item = PyIter_Next(it)) != NULL) { + int cmp = PyObject_IsTrue(item); + Py_DECREF(item); + if (cmp < 0) { + Py_DECREF(it); + return NULL; + } + if (cmp == 1) { + Py_DECREF(it); + Py_RETURN_TRUE; + } + } + Py_DECREF(it); + if (PyErr_Occurred()) + return NULL; + Py_RETURN_FALSE; +} + +PyDoc_STRVAR(any_doc, +"any(iterable) -> bool\n\ +\n\ +Return True if bool(x) is True for any x in the iterable."); static PyObject * builtin_apply(PyObject *self, PyObject *args) @@ -147,23 +210,27 @@ if (PyTuple_Check(seq)) return filtertuple(func, seq); + /* Pre-allocate argument list tuple. */ + arg = PyTuple_New(1); + if (arg == NULL) + return NULL; + /* Get iterator. */ it = PyObject_GetIter(seq); if (it == NULL) - return NULL; + goto Fail_arg; /* Guess a result list size. */ - len = PyObject_Size(seq); + len = _PyObject_LengthCue(seq); if (len < 0) { + if (!PyErr_ExceptionMatches(PyExc_TypeError) && + !PyErr_ExceptionMatches(PyExc_AttributeError)) { + goto Fail_it; + } PyErr_Clear(); len = 8; /* arbitrary */ } - /* Pre-allocate argument list tuple. */ - arg = PyTuple_New(1); - if (arg == NULL) - goto Fail_arg; - /* Get a result list. */ if (PyList_Check(seq) && seq->ob_refcnt == 1) { /* Eww - can modify the list in-place. */ @@ -462,7 +529,7 @@ return NULL; } if (globals != Py_None && !PyDict_Check(globals)) { - PyErr_SetString(PyExc_TypeError, PyMapping_Check(globals) ? + PyErr_SetString(PyExc_TypeError, PyMapping_Check(globals) ? "globals must be a real dict; try eval(expr, {}, mapping)" : "globals must be a dict"); return NULL; @@ -475,6 +542,13 @@ else if (locals == Py_None) locals = globals; + if (globals == NULL || locals == NULL) { + PyErr_SetString(PyExc_TypeError, + "eval must be given globals and locals " + "when called without a frame"); + return NULL; + } + if (PyDict_GetItemString(globals, "__builtins__") == NULL) { if (PyDict_SetItemString(globals, "__builtins__", PyEval_GetBuiltins()) != 0) @@ -799,8 +873,12 @@ } /* Update len. */ - curlen = PyObject_Size(curseq); + curlen = _PyObject_LengthCue(curseq); if (curlen < 0) { + if (!PyErr_ExceptionMatches(PyExc_TypeError) && + !PyErr_ExceptionMatches(PyExc_AttributeError)) { + goto Fail_2; + } PyErr_Clear(); curlen = 8; /* arbitrary */ } @@ -1127,11 +1205,11 @@ if (kwds != NULL && PyDict_Check(kwds) && PyDict_Size(kwds)) { keyfunc = PyDict_GetItemString(kwds, "key"); if (PyDict_Size(kwds)!=1 || keyfunc == NULL) { - PyErr_Format(PyExc_TypeError, + PyErr_Format(PyExc_TypeError, "%s() got an unexpected keyword argument", name); return NULL; } - } + } it = PyObject_GetIter(v); if (it == NULL) @@ -1830,11 +1908,9 @@ static char *kwlist[] = {"iterable", "cmp", "key", "reverse", 0}; long reverse; - if (args != NULL) { - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OOi:sorted", - kwlist, &seq, &compare, &keyfunc, &reverse)) - return NULL; - } + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OOi:sorted", + kwlist, &seq, &compare, &keyfunc, &reverse)) + return NULL; newlist = PySequence_List(seq); if (newlist == NULL) @@ -1845,7 +1921,7 @@ Py_DECREF(newlist); return NULL; } - + newargs = PyTuple_GetSlice(args, 1, 4); if (newargs == NULL) { Py_DECREF(newlist); @@ -2032,8 +2108,12 @@ len = -1; /* unknown */ for (i = 0; i < itemsize; ++i) { PyObject *item = PyTuple_GET_ITEM(args, i); - int thislen = PyObject_Size(item); + int thislen = _PyObject_LengthCue(item); if (thislen < 0) { + if (!PyErr_ExceptionMatches(PyExc_TypeError) && + !PyErr_ExceptionMatches(PyExc_AttributeError)) { + return NULL; + } PyErr_Clear(); len = -1; break; @@ -2125,6 +2205,8 @@ static PyMethodDef builtin_methods[] = { {"__import__", builtin___import__, METH_VARARGS, import_doc}, {"abs", builtin_abs, METH_O, abs_doc}, + {"all", builtin_all, METH_O, all_doc}, + {"any", builtin_any, METH_O, any_doc}, {"apply", builtin_apply, METH_VARARGS, apply_doc}, {"callable", builtin_callable, METH_O, callable_doc}, {"chr", builtin_chr, METH_VARARGS, chr_doc}, @@ -2471,21 +2553,21 @@ if (ok) { int reslen; if (!PyUnicode_Check(item)) { - PyErr_SetString(PyExc_TypeError, + PyErr_SetString(PyExc_TypeError, "can't filter unicode to unicode:" " __getitem__ returned different type"); Py_DECREF(item); goto Fail_1; } reslen = PyUnicode_GET_SIZE(item); - if (reslen == 1) + if (reslen == 1) PyUnicode_AS_UNICODE(result)[j++] = PyUnicode_AS_UNICODE(item)[0]; else { /* do we need more space? */ int need = j + reslen + len - i - 1; if (need > outlen) { - /* overallocate, + /* overallocate, to avoid reallocations */ if (need < 2 * outlen) need = 2 * outlen; Index: ceval.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/ceval.c,v retrieving revision 2.314.2.6 retrieving revision 2.314.2.7 diff -u -d -r2.314.2.6 -r2.314.2.7 --- ceval.c 7 Jan 2005 07:04:36 -0000 2.314.2.6 +++ ceval.c 16 Oct 2005 05:24:05 -0000 2.314.2.7 @@ -17,8 +17,10 @@ #include #ifndef WITH_TSC -#define rdtscll(var) -#else /*WITH_TSC defined*/ + +#define READ_TIMESTAMP(var) + +#else typedef unsigned long long uint64; @@ -26,7 +28,7 @@ section should work for GCC on any PowerPC platform, irrespective of OS. POWER? Who knows :-) */ -#define rdtscll(var) ppc_getcounter(&var) +#define READ_TIMESTAMP(var) ppc_getcounter(&var) static void ppc_getcounter(uint64 *v) @@ -45,9 +47,10 @@ ((long*)(v))[1] = tb; } -#else /* this section is for linux/x86 */ +#else /* this is for linux/x86 (and probably any other GCC/x86 combo) */ -#include +#define READ_TIMESTAMP(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) #endif @@ -99,7 +102,7 @@ static int call_trace(Py_tracefunc, PyObject *, PyFrameObject *, int, PyObject *); static void call_trace_protected(Py_tracefunc, PyObject *, - PyFrameObject *, int); + PyFrameObject *, int, PyObject *); static void call_exc_trace(Py_tracefunc, PyObject *, PyFrameObject *); static int maybe_call_line_trace(Py_tracefunc, PyObject *, PyFrameObject *, int *, int *, int *); @@ -414,8 +417,11 @@ /* The interpreter's recursion limit */ -static int recursion_limit = 1000; -int _Py_CheckRecursionLimit = 1000; +#ifndef Py_DEFAULT_RECURSION_LIMIT +#define Py_DEFAULT_RECURSION_LIMIT 1000 +#endif +static int recursion_limit = Py_DEFAULT_RECURSION_LIMIT; +int _Py_CheckRecursionLimit = Py_DEFAULT_RECURSION_LIMIT; int Py_GetRecursionLimit(void) @@ -493,7 +499,14 @@ /* Interpreter main loop */ PyObject * -PyEval_EvalFrame(PyFrameObject *f) +PyEval_EvalFrame(PyFrameObject *f) { + /* This is for backward compatibility with extension modules that + used this API; core interpreter code should call PyEval_EvalFrameEx() */ + return PyEval_EvalFrameEx(f, 0); +} + +PyObject * +PyEval_EvalFrameEx(PyFrameObject *f, int throw) { #ifdef DXPAIRS int lastopcode = 0; @@ -575,10 +588,10 @@ uint64 inst0, inst1, loop0, loop1, intr0 = 0, intr1 = 0; int ticked = 0; - rdtscll(inst0); - rdtscll(inst1); - rdtscll(loop0); - rdtscll(loop1); + READ_TIMESTAMP(inst0); + READ_TIMESTAMP(inst1); + READ_TIMESTAMP(loop0); + READ_TIMESTAMP(loop1); /* shut up the compiler */ opcode = 0; @@ -715,7 +728,7 @@ consts = co->co_consts; fastlocals = f->f_localsplus; freevars = f->f_localsplus + f->f_nlocals; - first_instr = PyString_AS_STRING(co->co_code); + first_instr = (unsigned char*) PyString_AS_STRING(co->co_code); /* An explanation is in order for the next line. f->f_lasti now refers to the index of the last instruction @@ -741,6 +754,11 @@ x = Py_None; /* Not a reference, just anything non-NULL */ w = NULL; + if (throw) { /* support for generator.throw() */ + why = WHY_EXCEPTION; + goto on_error; + } + for (;;) { #ifdef WITH_TSC if (inst1 == 0) { @@ -748,7 +766,7 @@ or a continue, preventing inst1 from being set on the way out of the loop. */ - rdtscll(inst1); + READ_TIMESTAMP(inst1); loop1 = inst1; } dump_tsc(opcode, ticked, inst0, inst1, loop0, loop1, @@ -757,7 +775,7 @@ inst1 = 0; intr0 = 0; intr1 = 0; - rdtscll(loop0); + READ_TIMESTAMP(loop0); #endif assert(stack_pointer >= f->f_valuestack); /* else underflow */ assert(STACK_LEVEL() <= f->f_stacksize); /* else overflow */ @@ -883,7 +901,7 @@ #endif /* Main switch on opcode */ - rdtscll(inst0); + READ_TIMESTAMP(inst0); switch (opcode) { @@ -1642,9 +1660,9 @@ v = SECOND(); u = THIRD(); STACKADJ(-3); - rdtscll(intr0); + READ_TIMESTAMP(intr0); err = exec_statement(f, u, v, w); - rdtscll(intr1); + READ_TIMESTAMP(intr1); Py_DECREF(u); Py_DECREF(v); Py_DECREF(w); @@ -2020,9 +2038,9 @@ x = NULL; break; } - rdtscll(intr0); + READ_TIMESTAMP(intr0); x = PyEval_CallObject(x, w); - rdtscll(intr1); + READ_TIMESTAMP(intr1); Py_DECREF(w); SET_TOP(x); if (x != NULL) continue; @@ -2036,9 +2054,9 @@ "no locals found during 'import *'"); break; } - rdtscll(intr0); + READ_TIMESTAMP(intr0); err = import_all_from(x, v); - rdtscll(intr1); + READ_TIMESTAMP(intr1); PyFrame_LocalsToFast(f, 0); Py_DECREF(v); if (err == 0) continue; @@ -2047,9 +2065,9 @@ case IMPORT_FROM: w = GETITEM(names, oparg); v = TOP(); - rdtscll(intr0); + READ_TIMESTAMP(intr0); x = import_from(v, w); - rdtscll(intr1); + READ_TIMESTAMP(intr1); PUSH(x); if (x != NULL) continue; break; @@ -2203,9 +2221,9 @@ } else Py_INCREF(func); sp = stack_pointer; - rdtscll(intr0); + READ_TIMESTAMP(intr0); x = ext_do_call(func, &sp, flags, na, nk); - rdtscll(intr1); + READ_TIMESTAMP(intr1); stack_pointer = sp; Py_DECREF(func); @@ -2306,7 +2324,7 @@ on_error: - rdtscll(inst1); + READ_TIMESTAMP(inst1); /* Quickly continue if no error occurred */ @@ -2319,7 +2337,7 @@ "XXX undetected error\n"); else { #endif - rdtscll(loop1); + READ_TIMESTAMP(loop1); continue; /* Normal, fast path */ #ifdef CHECKEXC } @@ -2438,7 +2456,7 @@ if (why != WHY_NOT) break; - rdtscll(loop1); + READ_TIMESTAMP(loop1); } /* main loop */ @@ -2454,21 +2472,27 @@ fast_yield: if (tstate->use_tracing) { - if (tstate->c_tracefunc - && (why == WHY_RETURN || why == WHY_YIELD)) { - if (call_trace(tstate->c_tracefunc, - tstate->c_traceobj, f, - PyTrace_RETURN, retval)) { - Py_XDECREF(retval); - retval = NULL; - why = WHY_EXCEPTION; + if (tstate->c_tracefunc) { + if (why == WHY_RETURN || why == WHY_YIELD) { + if (call_trace(tstate->c_tracefunc, + tstate->c_traceobj, f, + PyTrace_RETURN, retval)) { + Py_XDECREF(retval); + retval = NULL; + why = WHY_EXCEPTION; + } + } + else if (why == WHY_EXCEPTION) { + call_trace_protected(tstate->c_tracefunc, + tstate->c_traceobj, f, + PyTrace_RETURN, NULL); } } if (tstate->c_profilefunc) { if (why == WHY_EXCEPTION) call_trace_protected(tstate->c_profilefunc, tstate->c_profileobj, f, - PyTrace_RETURN); + PyTrace_RETURN, NULL); else if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, f, PyTrace_RETURN, retval)) { @@ -2491,7 +2515,7 @@ /* this is gonna seem *real weird*, but if you put some other code between PyEval_EvalFrame() and PyEval_EvalCodeEx() you will need to adjust - the test in the if statement in Misc/gdbinit:ppystack */ + the test in the if statement in Misc/gdbinit:pystack* */ PyObject * PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, @@ -2716,7 +2740,7 @@ return PyGen_New(f); } - retval = PyEval_EvalFrame(f); + retval = PyEval_EvalFrameEx(f,0); fail: /* Jump here from prelude on failure */ @@ -3073,12 +3097,12 @@ static void call_trace_protected(Py_tracefunc func, PyObject *obj, PyFrameObject *frame, - int what) + int what, PyObject *arg) { PyObject *type, *value, *traceback; int err; PyErr_Fetch(&type, &value, &traceback); - err = call_trace(func, obj, frame, what, NULL); + err = call_trace(func, obj, frame, what, arg); if (err == 0) PyErr_Restore(type, value, traceback); else { @@ -3267,10 +3291,12 @@ Py_XINCREF(arg); tstate->c_profilefunc = NULL; tstate->c_profileobj = NULL; + /* Must make sure that tracing is not ignored if 'temp' is freed */ tstate->use_tracing = tstate->c_tracefunc != NULL; Py_XDECREF(temp); tstate->c_profilefunc = func; tstate->c_profileobj = arg; + /* Flag that tracing or profiling is turned on */ tstate->use_tracing = (func != NULL) || (tstate->c_tracefunc != NULL); } @@ -3282,10 +3308,12 @@ Py_XINCREF(arg); tstate->c_tracefunc = NULL; tstate->c_traceobj = NULL; + /* Must make sure that profiling is not ignored if 'temp' is freed */ tstate->use_tracing = tstate->c_profilefunc != NULL; Py_XDECREF(temp); tstate->c_tracefunc = func; tstate->c_traceobj = arg; + /* Flag that tracing or profiling is turned on */ tstate->use_tracing = ((func != NULL) || (tstate->c_profilefunc != NULL)); } @@ -3464,31 +3492,36 @@ nargs); } -#define C_TRACE(call) \ +#define C_TRACE(x, call) \ if (tstate->use_tracing && tstate->c_profilefunc) { \ if (call_trace(tstate->c_profilefunc, \ tstate->c_profileobj, \ tstate->frame, PyTrace_C_CALL, \ - func)) \ - { return NULL; } \ - call; \ - if (tstate->c_profilefunc != NULL) { \ - if (x == NULL) { \ - if (call_trace (tstate->c_profilefunc, \ - tstate->c_profileobj, \ - tstate->frame, PyTrace_C_EXCEPTION, \ - func)) \ - { return NULL; } \ - } else { \ - if (call_trace(tstate->c_profilefunc, \ - tstate->c_profileobj, \ - tstate->frame, PyTrace_C_RETURN, \ - func)) \ - { return NULL; } \ + func)) { \ + x = NULL; \ + } \ + else { \ + x = call; \ + if (tstate->c_profilefunc != NULL) { \ + if (x == NULL) { \ + call_trace_protected(tstate->c_profilefunc, \ + tstate->c_profileobj, \ + tstate->frame, PyTrace_C_EXCEPTION, \ + func); \ + /* XXX should pass (type, value, tb) */ \ + } else { \ + if (call_trace(tstate->c_profilefunc, \ + tstate->c_profileobj, \ + tstate->frame, PyTrace_C_RETURN, \ + func)) { \ + Py_DECREF(x); \ + x = NULL; \ + } \ + } \ } \ } \ } else { \ - call; \ + x = call; \ } static PyObject * @@ -3517,11 +3550,11 @@ PyCFunction meth = PyCFunction_GET_FUNCTION(func); PyObject *self = PyCFunction_GET_SELF(func); if (flags & METH_NOARGS && na == 0) { - C_TRACE(x=(*meth)(self,NULL)); + C_TRACE(x, (*meth)(self,NULL)); } else if (flags & METH_O && na == 1) { PyObject *arg = EXT_POP(*pp_stack); - C_TRACE(x=(*meth)(self,arg)); + C_TRACE(x, (*meth)(self,arg)); Py_DECREF(arg); } else { @@ -3532,9 +3565,9 @@ else { PyObject *callargs; callargs = load_args(pp_stack, na); - rdtscll(*pintr0); - C_TRACE(x=PyCFunction_Call(func,callargs,NULL)); - rdtscll(*pintr1); + READ_TIMESTAMP(*pintr0); + C_TRACE(x, PyCFunction_Call(func,callargs,NULL)); + READ_TIMESTAMP(*pintr1); Py_XDECREF(callargs); } } else { @@ -3552,12 +3585,12 @@ n++; } else Py_INCREF(func); - rdtscll(*pintr0); + READ_TIMESTAMP(*pintr0); if (PyFunction_Check(func)) x = fast_function(func, pp_stack, n, na, nk); else x = do_call(func, pp_stack, na, nk); - rdtscll(*pintr1); + READ_TIMESTAMP(*pintr1); Py_DECREF(func); } @@ -3615,7 +3648,7 @@ Py_INCREF(*stack); fastlocals[i] = *stack++; } - retval = PyEval_EvalFrame(f); + retval = PyEval_EvalFrameEx(f,0); assert(tstate != NULL); ++tstate->recursion_depth; Py_DECREF(f); Index: compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.247.2.3 retrieving revision 2.247.2.4 diff -u -d -r2.247.2.3 -r2.247.2.4 --- compile.c 13 Apr 2005 19:44:38 -0000 2.247.2.3 +++ compile.c 16 Oct 2005 05:24:05 -0000 2.247.2.4 @@ -325,6 +325,598 @@ return 0; } +/* Begin: Peephole optimizations ----------------------------------------- */ + +#define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1])) +#define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD) +#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP) +#define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3)) +#define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255 +#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1) +#define ISBASICBLOCK(blocks, start, bytes) (blocks[start]==blocks[start+bytes-1]) + +/* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n + with LOAD_CONST (c1, c2, ... cn). + The consts table must still be in list form so that the + new constant (c1, c2, ... cn) can be appended. + Called with codestr pointing to the first LOAD_CONST. + Bails out with no change if one or more of the LOAD_CONSTs is missing. + Also works for BUILD_LIST when followed by an "in" or "not in" test. +*/ +static int +tuple_of_constants(unsigned char *codestr, int n, PyObject *consts) +{ + PyObject *newconst, *constant; + int i, arg, len_consts; + + /* Pre-conditions */ + assert(PyList_CheckExact(consts)); + assert(codestr[n*3] == BUILD_TUPLE || codestr[n*3] == BUILD_LIST); + assert(GETARG(codestr, (n*3)) == n); + for (i=0 ; i 20) { + Py_DECREF(newconst); + return 0; + } + + /* Append folded constant into consts table */ + len_consts = PyList_GET_SIZE(consts); + if (PyList_Append(consts, newconst)) { + Py_DECREF(newconst); + return 0; + } + Py_DECREF(newconst); + + /* Write NOP NOP NOP NOP LOAD_CONST newconst */ + memset(codestr, NOP, 4); + codestr[4] = LOAD_CONST; + SETARG(codestr, 4, len_consts); + return 1; +} + +static int +fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts) +{ + PyObject *newconst=NULL, *v; + int len_consts, opcode; + + /* Pre-conditions */ + assert(PyList_CheckExact(consts)); + assert(codestr[0] == LOAD_CONST); + + /* Create new constant */ + v = PyList_GET_ITEM(consts, GETARG(codestr, 0)); + opcode = codestr[3]; + switch (opcode) { + case UNARY_NEGATIVE: + /* Preserve the sign of -0.0 */ + if (PyObject_IsTrue(v) == 1) + newconst = PyNumber_Negative(v); + break; + case UNARY_CONVERT: + newconst = PyObject_Repr(v); + break; + case UNARY_INVERT: + newconst = PyNumber_Invert(v); + break; + default: + /* Called with an unknown opcode */ + assert(0); + return 0; + } + if (newconst == NULL) { + PyErr_Clear(); + return 0; + } + + /* Append folded constant into consts table */ + len_consts = PyList_GET_SIZE(consts); + if (PyList_Append(consts, newconst)) { + Py_DECREF(newconst); + return 0; + } + Py_DECREF(newconst); + + /* Write NOP LOAD_CONST newconst */ + codestr[0] = NOP; + codestr[1] = LOAD_CONST; + SETARG(codestr, 1, len_consts); + return 1; +} + +static unsigned int * +markblocks(unsigned char *code, int len) +{ + unsigned int *blocks = PyMem_Malloc(len*sizeof(int)); + int i,j, opcode, blockcnt = 0; + + if (blocks == NULL) + return NULL; + memset(blocks, 0, len*sizeof(int)); + + /* Mark labels in the first pass */ + for (i=0 ; i= 255. + + Optimizations are restricted to simple transformations occuring within a + single basic block. All transformations keep the code size the same or + smaller. For those that reduce size, the gaps are initially filled with + NOPs. Later those NOPs are removed and the jump addresses retargeted in + a single pass. Line numbering is adjusted accordingly. */ + +static PyObject * +optimize_code(PyObject *code, PyObject* consts, PyObject *names, PyObject *lineno_obj) +{ + int i, j, codelen, nops, h, adj; + int tgt, tgttgt, opcode; + unsigned char *codestr = NULL; + unsigned char *lineno; + int *addrmap = NULL; + int new_line, cum_orig_line, last_line, tabsiz; + int cumlc=0, lastlc=0; /* Count runs of consecutive LOAD_CONST codes */ + unsigned int *blocks = NULL; + char *name; + + /* Bail out if an exception is set */ + if (PyErr_Occurred()) + goto exitUnchanged; + + /* Bypass optimization when the lineno table is too complex */ + assert(PyString_Check(lineno_obj)); + lineno = (unsigned char*)PyString_AS_STRING(lineno_obj); + tabsiz = PyString_GET_SIZE(lineno_obj); + if (memchr(lineno, 255, tabsiz) != NULL) + goto exitUnchanged; + + /* Avoid situations where jump retargeting could overflow */ + assert(PyString_Check(code)); + codelen = PyString_Size(code); + if (codelen > 32700) + goto exitUnchanged; + + /* Make a modifiable copy of the code string */ + codestr = PyMem_Malloc(codelen); + if (codestr == NULL) + goto exitUnchanged; + codestr = memcpy(codestr, PyString_AS_STRING(code), codelen); + + /* Verify that RETURN_VALUE terminates the codestring. This allows + the various transformation patterns to look ahead several + instructions without additional checks to make sure they are not + looking beyond the end of the code string. + */ + if (codestr[codelen-1] != RETURN_VALUE) + goto exitUnchanged; + + /* Mapping to new jump targets after NOPs are removed */ + addrmap = PyMem_Malloc(codelen * sizeof(int)); + if (addrmap == NULL) + goto exitUnchanged; + + blocks = markblocks(codestr, codelen); + if (blocks == NULL) + goto exitUnchanged; + assert(PyList_Check(consts)); + + for (i=0 ; i a is not b + not a in b --> a not in b + not a is not b --> a is b + not a not in b --> a in b + */ + case COMPARE_OP: + j = GETARG(codestr, i); + if (j < 6 || j > 9 || + codestr[i+3] != UNARY_NOT || + !ISBASICBLOCK(blocks,i,4)) + continue; + SETARG(codestr, i, (j^1)); + codestr[i+3] = NOP; + break; + + /* Replace LOAD_GLOBAL/LOAD_NAME None with LOAD_CONST None */ + case LOAD_NAME: + case LOAD_GLOBAL: + j = GETARG(codestr, i); + name = PyString_AsString(PyTuple_GET_ITEM(names, j)); + if (name == NULL || strcmp(name, "None") != 0) + continue; + for (j=0 ; j < PyList_GET_SIZE(consts) ; j++) { + if (PyList_GET_ITEM(consts, j) == Py_None) { + codestr[i] = LOAD_CONST; + SETARG(codestr, i, j); + cumlc = lastlc + 1; + break; + } + } + break; + + /* Skip over LOAD_CONST trueconst JUMP_IF_FALSE xx POP_TOP */ + case LOAD_CONST: + cumlc = lastlc + 1; + j = GETARG(codestr, i); + if (codestr[i+3] != JUMP_IF_FALSE || + codestr[i+6] != POP_TOP || + !ISBASICBLOCK(blocks,i,7) || + !PyObject_IsTrue(PyList_GET_ITEM(consts, j))) + continue; + memset(codestr+i, NOP, 7); + cumlc = 0; + break; + + /* Try to fold tuples of constants (includes a case for lists + which are only used for "in" and "not in" tests). + Skip over BUILD_SEQN 1 UNPACK_SEQN 1. + Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2. + Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */ + case BUILD_TUPLE: + case BUILD_LIST: + j = GETARG(codestr, i); + h = i - 3 * j; + if (h >= 0 && + j <= lastlc && + ((opcode == BUILD_TUPLE && + ISBASICBLOCK(blocks, h, 3*(j+1))) || + (opcode == BUILD_LIST && + codestr[i+3]==COMPARE_OP && + ISBASICBLOCK(blocks, h, 3*(j+2)) && + (GETARG(codestr,i+3)==6 || + GETARG(codestr,i+3)==7))) && + tuple_of_constants(&codestr[h], j, consts)) { + assert(codestr[i] == LOAD_CONST); + cumlc = 1; + break; + } + if (codestr[i+3] != UNPACK_SEQUENCE || + !ISBASICBLOCK(blocks,i,6) || + j != GETARG(codestr, i+3)) + continue; + if (j == 1) { + memset(codestr+i, NOP, 6); + } else if (j == 2) { + codestr[i] = ROT_TWO; + memset(codestr+i+1, NOP, 5); + } else if (j == 3) { + codestr[i] = ROT_THREE; + codestr[i+1] = ROT_TWO; + memset(codestr+i+2, NOP, 4); + } + break; + + /* Fold binary ops on constants. + LOAD_CONST c1 LOAD_CONST c2 BINOP --> LOAD_CONST binop(c1,c2) */ + case BINARY_POWER: + case BINARY_MULTIPLY: + case BINARY_TRUE_DIVIDE: + case BINARY_FLOOR_DIVIDE: + case BINARY_MODULO: + case BINARY_ADD: + case BINARY_SUBTRACT: + case BINARY_SUBSCR: + case BINARY_LSHIFT: + case BINARY_RSHIFT: + case BINARY_AND: + case BINARY_XOR: + case BINARY_OR: + if (lastlc >= 2 && + ISBASICBLOCK(blocks, i-6, 7) && + fold_binops_on_constants(&codestr[i-6], consts)) { + i -= 2; + assert(codestr[i] == LOAD_CONST); + cumlc = 1; + } + break; + + /* Fold unary ops on constants. + LOAD_CONST c1 UNARY_OP --> LOAD_CONST unary_op(c) */ + case UNARY_NEGATIVE: + case UNARY_CONVERT: + case UNARY_INVERT: + if (lastlc >= 1 && + ISBASICBLOCK(blocks, i-3, 4) && + fold_unaryops_on_constants(&codestr[i-3], consts)) { + i -= 2; + assert(codestr[i] == LOAD_CONST); + cumlc = 1; + } + break; + + /* Simplify conditional jump to conditional jump where the + result of the first test implies the success of a similar + test or the failure of the opposite test. + Arises in code like: + "if a and b:" + "if a or b:" + "a and b or c" + "(a and b) and c" + x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z + x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3 + where y+3 is the instruction following the second test. + */ + case JUMP_IF_FALSE: + case JUMP_IF_TRUE: + tgt = GETJUMPTGT(codestr, i); + j = codestr[tgt]; + if (j == JUMP_IF_FALSE || j == JUMP_IF_TRUE) { + if (j == opcode) { + tgttgt = GETJUMPTGT(codestr, tgt) - i - 3; + SETARG(codestr, i, tgttgt); + } else { + tgt -= i; + SETARG(codestr, i, tgt); + } + break; + } + /* Intentional fallthrough */ + + /* Replace jumps to unconditional jumps */ + case FOR_ITER: + case JUMP_FORWARD: + case JUMP_ABSOLUTE: + case CONTINUE_LOOP: + case SETUP_LOOP: + case SETUP_EXCEPT: + case SETUP_FINALLY: + tgt = GETJUMPTGT(codestr, i); + if (!UNCONDITIONAL_JUMP(codestr[tgt])) + continue; + tgttgt = GETJUMPTGT(codestr, tgt); + if (opcode == JUMP_FORWARD) /* JMP_ABS can go backwards */ + opcode = JUMP_ABSOLUTE; + if (!ABSOLUTE_JUMP(opcode)) + tgttgt -= i + 3; /* Calc relative jump addr */ + if (tgttgt < 0) /* No backward relative jumps */ + continue; + codestr[i] = opcode; + SETARG(codestr, i, tgttgt); + break; + + case EXTENDED_ARG: + goto exitUnchanged; + + /* Replace RETURN LOAD_CONST None RETURN with just RETURN */ + case RETURN_VALUE: + if (i+4 >= codelen || + codestr[i+4] != RETURN_VALUE || + !ISBASICBLOCK(blocks,i,5)) + continue; + memset(codestr+i+1, NOP, 4); + break; + } + } + + /* Fixup linenotab */ + for (i=0, nops=0 ; ic_begin; + + REQ(n, gen_for); + /* gen_for: for v in test [gen_iter] */ + + com_addfwref(c, SETUP_LOOP, &break_anchor); + block_push(c, SETUP_LOOP); + + if (is_outmost) { + com_addop_varname(c, VAR_LOAD, "[outmost-iterable]"); + com_push(c, 1); + } + else { + com_node(c, CHILD(n, 3)); + com_addbyte(c, GET_ITER); + } + + c->c_begin = c->c_nexti; + com_set_lineno(c, c->c_last_line); + com_addfwref(c, FOR_ITER, &anchor); + com_push(c, 1); + com_assign(c, CHILD(n, 1), OP_ASSIGN, NULL); + + if (NCH(n) == 5) + com_gen_iter(c, CHILD(n, 4), t); + else { + com_test(c, t); + com_addbyte(c, YIELD_VALUE); + com_addbyte(c, POP_TOP); + com_pop(c, 1); + } + + com_addoparg(c, JUMP_ABSOLUTE, c->c_begin); + c->c_begin = save_begin; + + com_backpatch(c, anchor); + com_pop(c, 1); /* FOR_ITER has popped this */ + com_addbyte(c, POP_BLOCK); + block_pop(c, SETUP_LOOP); + com_backpatch(c, break_anchor); +} + +static void com_list_if(struct compiling *c, node *n, node *e, char *t) { int anchor = 0; @@ -1413,6 +2052,33 @@ } static void +com_gen_if(struct compiling *c, node *n, node *t) +{ + /* gen_if: 'if' test [gen_iter] */ + int anchor = 0; + int a=0; + + com_node(c, CHILD(n, 1)); + com_addfwref(c, JUMP_IF_FALSE, &a); + com_addbyte(c, POP_TOP); + com_pop(c, 1); + + if (NCH(n) == 3) + com_gen_iter(c, CHILD(n, 2), t); + else { + com_test(c, t); + com_addbyte(c, YIELD_VALUE); + com_addbyte(c, POP_TOP); + com_pop(c, 1); + } + com_addfwref(c, JUMP_FORWARD, &anchor); + com_backpatch(c, a); + /* We jump here with an extra entry which we now pop */ + com_addbyte(c, POP_TOP); + com_backpatch(c, anchor); +} + +static void com_list_iter(struct compiling *c, node *p, /* parent of list_iter node */ node *e, /* element expression node */ @@ -1495,6 +2161,10 @@ } } + +/* forward reference */ +static void com_yield_expr(struct compiling *c, node *n); + static void com_atom(struct compiling *c, node *n) { @@ -1510,7 +2180,10 @@ com_push(c, 1); } else - com_node(c, CHILD(n, 1)); + if (TYPE(CHILD(n, 1)) == yield_expr) + com_yield_expr(c, CHILD(n, 1)); + else + com_testlist_gexp(c, CHILD(n, 1)); break; case LSQB: /* '[' [listmaker] ']' */ if (TYPE(CHILD(n, 1)) == RSQB) { @@ -2562,7 +3235,11 @@ } n = CHILD(n, 0); break; - + case yield_expr: + com_error(c, PyExc_SyntaxError, + "assignment to yield expression not possible"); + return; + case test: case and_test: case not_test: @@ -2619,7 +3296,7 @@ } if (assigning > OP_APPLY) { com_error(c, PyExc_SyntaxError, - "augmented assign to tuple not possible"); + "augmented assign to tuple literal, yield, or generator expression not possible"); return; } break; @@ -2855,27 +3532,41 @@ } static void -com_yield_stmt(struct compiling *c, node *n) +com_yield_expr(struct compiling *c, node *n) { - int i; - REQ(n, yield_stmt); /* 'yield' testlist */ + REQ(n, yield_expr); /* 'yield' testlist */ if (!c->c_infunction) { com_error(c, PyExc_SyntaxError, "'yield' outside function"); } - for (i = 0; i < c->c_nblocks; ++i) { + /* for (i = 0; i < c->c_nblocks; ++i) { if (c->c_block[i] == SETUP_FINALLY) { com_error(c, PyExc_SyntaxError, "'yield' not allowed in a 'try' block " "with a 'finally' clause"); return; } + } */ + + if (NCH(n) < 2) { + com_addoparg(c, LOAD_CONST, com_addconst(c, Py_None)); + com_push(c, 1); } - com_node(c, CHILD(n, 1)); + else + com_node(c, CHILD(n, 1)); com_addbyte(c, YIELD_VALUE); +} + +static void +com_yield_stmt(struct compiling *c, node *n) +{ + REQ(n, yield_stmt); /* yield_expr */ + com_node(c, CHILD(n, 0)); + com_addbyte(c, POP_TOP); com_pop(c, 1); } + static void com_raise_stmt(struct compiling *c, node *n) { @@ -3639,7 +4330,7 @@ char *name; REQ(n, classdef); - /* classdef: class NAME ['(' testlist ')'] ':' suite */ + /* classdef: class NAME ['(' [testlist] ')'] ':' suite */ if ((v = PyString_InternFromString(STR(CHILD(n, 1)))) == NULL) { c->c_errors++; return; @@ -3650,7 +4341,8 @@ com_push(c, 1); Py_DECREF(v); /* Push the tuple of base classes on the stack */ - if (TYPE(CHILD(n, 2)) != LPAR) { + if (TYPE(CHILD(n, 2)) != LPAR || + TYPE(CHILD(n, 3)) == RPAR) { com_addoparg(c, BUILD_TUPLE, 0); com_push(c, 1); } @@ -3782,6 +4474,10 @@ /* Expression nodes */ + case yield_expr: + com_yield_expr(c, n); + break; + case testlist: case testlist1: case testlist_safe: @@ -4029,6 +4725,25 @@ } static void +compile_generator_expression(struct compiling *c, node *n) +{ + /* testlist_gexp: test gen_for */ + /* argument: test gen_for */ + REQ(CHILD(n, 0), test); + REQ(CHILD(n, 1), gen_for); + + c->c_name = ""; + c->c_infunction = 1; + com_gen_for(c, CHILD(n, 1), CHILD(n, 0), 1); + c->c_infunction = 0; + + com_addoparg(c, LOAD_CONST, com_addconst(c, Py_None)); + com_push(c, 1); + com_addbyte(c, RETURN_VALUE); + com_pop(c, 1); +} + +static void compile_node(struct compiling *c, node *n) { com_addoparg(c, SET_LINENO, n->n_lineno); @@ -4979,7 +5694,7 @@ #define symtable_add_use(ST, NAME) symtable_add_def((ST), (NAME), USE) -/* Look for a yield stmt under n. Return 1 if found, else 0. +/* Look for a yield stmt or expr under n. Return 1 if found, else 0. This hack is used to look inside "if 0:" blocks (which are normally ignored) in case those are the only places a yield occurs (so that this function is a generator). */ @@ -5001,7 +5716,8 @@ return 0; case yield_stmt: - return 1; + case yield_expr: + return GENERATOR; default: if (look_for_yield(kid)) @@ -5108,8 +5824,10 @@ case del_stmt: symtable_assign(st, CHILD(n, 1), 0); break; - case yield_stmt: + case yield_expr: st->st_cur->ste_generator = 1; + if (NCH(n)==1) + break; n = CHILD(n, 1); goto loop; case expr_stmt: @@ -5170,9 +5888,15 @@ } /* fall through */ case atom: - if (TYPE(n) == atom && TYPE(CHILD(n, 0)) == NAME) { - symtable_add_use(st, STR(CHILD(n, 0))); - break; + if (TYPE(n) == atom) { + if (TYPE(CHILD(n, 0)) == NAME) { + symtable_add_use(st, STR(CHILD(n, 0))); + break; + } + else if (TYPE(CHILD(n,0)) == LPAR) { + n = CHILD(n,1); + goto loop; + } } /* fall through */ default: @@ -5492,6 +6216,15 @@ symtable_add_def(st, STR(tmp), DEF_LOCAL | def_flag); } return; + + case yield_expr: + st->st_cur->ste_generator = 1; + if (NCH(n)==2) { + n = CHILD(n, 1); + goto loop; + } + return; + case dotted_as_name: if (NCH(n) == 3) symtable_add_def(st, STR(CHILD(n, 2)), Index: errors.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/errors.c,v retrieving revision 2.70.2.3 retrieving revision 2.70.2.4 diff -u -d -r2.70.2.3 -r2.70.2.4 --- errors.c 7 Jan 2005 07:04:38 -0000 2.70.2.3 +++ errors.c 16 Oct 2005 05:24:05 -0000 2.70.2.4 @@ -535,10 +535,6 @@ } if (base == NULL) base = PyExc_Exception; - if (!PyClass_Check(base)) { - /* Must be using string-based standard exceptions (-X) */ - return PyString_FromString(name); - } if (dict == NULL) { dict = mydict = PyDict_New(); if (dict == NULL) Index: exceptions.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/exceptions.c,v retrieving revision 1.32.2.2 retrieving revision 1.32.2.3 diff -u -d -r1.32.2.2 -r1.32.2.3 --- exceptions.c 7 Jan 2005 07:04:38 -0000 1.32.2.2 +++ exceptions.c 16 Oct 2005 05:24:05 -0000 1.32.2.3 @@ -57,6 +57,7 @@ |\n\ +-- SystemExit\n\ +-- StopIteration\n\ + +-- GeneratorExit\n\ +-- StandardError\n\ | |\n\ | +-- KeyboardInterrupt\n\ @@ -394,6 +395,7 @@ PyDoc_STRVAR(TypeError__doc__, "Inappropriate argument type."); PyDoc_STRVAR(StopIteration__doc__, "Signal the end from iterator.next()."); +PyDoc_STRVAR(GeneratorExit__doc__, "Request that a generator exit."); @@ -1583,6 +1585,7 @@ PyObject *PyExc_Exception; PyObject *PyExc_StopIteration; +PyObject *PyExc_GeneratorExit; PyObject *PyExc_StandardError; PyObject *PyExc_ArithmeticError; PyObject *PyExc_LookupError; @@ -1657,6 +1660,8 @@ {"Exception", &PyExc_Exception}, {"StopIteration", &PyExc_StopIteration, &PyExc_Exception, StopIteration__doc__}, + {"GeneratorExit", &PyExc_GeneratorExit, &PyExc_Exception, + GeneratorExit__doc__}, {"StandardError", &PyExc_StandardError, &PyExc_Exception, StandardError__doc__}, {"TypeError", &PyExc_TypeError, 0, TypeError__doc__}, Index: future.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/future.c,v retrieving revision 2.12.2.7 retrieving revision 2.12.2.8 diff -u -d -r2.12.2.7 -r2.12.2.8 --- future.c 21 Apr 2004 14:41:49 -0000 2.12.2.7 +++ future.c 16 Oct 2005 05:24:05 -0000 2.12.2.8 @@ -130,4 +130,3 @@ } return ff; } - Index: getargs.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/getargs.c,v retrieving revision 2.92.2.2 retrieving revision 2.92.2.3 diff -u -d -r2.92.2.2 -r2.92.2.3 --- getargs.c 7 Jan 2005 07:04:39 -0000 2.92.2.2 +++ getargs.c 16 Oct 2005 05:24:05 -0000 2.92.2.3 @@ -453,7 +453,9 @@ or a string with a message describing the failure. The message is formatted as "must be , not ". When failing, an exception may or may not have been raised. - Don't call if a tuple is expected. + Don't call if a tuple is expected. + + When you add new format codes, please don't forget poor skipitem() below. */ static char * @@ -1299,7 +1301,8 @@ /* make sure we got an acceptable number of arguments; the message is a little confusing with keywords since keyword arguments which are supplied, but don't match the required arguments - are not included in the "%d given" part of the message */ + are not included in the "%d given" part of the message + XXX and this isn't a bug!? */ if (len < min || max < len) { if (message == NULL) { PyOS_snprintf(msgbuf, sizeof(msgbuf), @@ -1405,83 +1408,52 @@ char c = *format++; switch (c) { - + + /* simple codes + * The individual types (second arg of va_arg) are irrelevant */ + case 'b': /* byte -- very short int */ case 'B': /* byte as bitfield */ - { - (void) va_arg(*p_va, char *); - break; - } - case 'h': /* short int */ - { - (void) va_arg(*p_va, short *); - break; - } - case 'H': /* short int as bitfield */ - { - (void) va_arg(*p_va, unsigned short *); - break; - } - case 'i': /* int */ - { - (void) va_arg(*p_va, int *); - break; - } - + case 'I': /* int sized bitfield */ case 'l': /* long int */ - { - (void) va_arg(*p_va, long *); - break; - } - + case 'k': /* long int sized bitfield */ #ifdef HAVE_LONG_LONG - case 'L': /* PY_LONG_LONG int */ - { - (void) va_arg(*p_va, PY_LONG_LONG *); - break; - } + case 'L': /* PY_LONG_LONG */ + case 'K': /* PY_LONG_LONG sized bitfield */ #endif - case 'f': /* float */ - { - (void) va_arg(*p_va, float *); - break; - } - case 'd': /* double */ - { - (void) va_arg(*p_va, double *); - break; - } - #ifndef WITHOUT_COMPLEX case 'D': /* complex double */ - { - (void) va_arg(*p_va, Py_complex *); - break; - } -#endif /* WITHOUT_COMPLEX */ - +#endif case 'c': /* char */ { - (void) va_arg(*p_va, char *); + (void) va_arg(*p_va, void *); break; } - case 's': /* string */ + /* string codes */ + + case 'e': /* string with encoding */ { - (void) va_arg(*p_va, char **); - if (*format == '#') { - (void) va_arg(*p_va, int *); - format++; - } - break; + (void) va_arg(*p_va, const char *); + if (!(*format == 's' || *format == 't')) + /* after 'e', only 's' and 't' is allowed */ + goto err; + format++; + /* explicit fallthrough to string cases */ } - case 'z': /* string */ + case 's': /* string */ + case 'z': /* string or None */ +#ifdef Py_USING_UNICODE + case 'u': /* unicode string */ +#endif + case 't': /* buffer, read-only */ + case 'w': /* buffer, read-write */ { (void) va_arg(*p_va, char **); if (*format == '#') { @@ -1490,8 +1462,13 @@ } break; } - + + /* object codes */ + case 'S': /* string object */ +#ifdef Py_USING_UNICODE + case 'U': /* unicode string object */ +#endif { (void) va_arg(*p_va, PyObject **); break; @@ -1527,9 +1504,13 @@ } default: +err: return "impossible"; } + + /* The "(...)" format code for tuples is not handled here because + * it is not allowed with keyword args. */ *p_format = format; return NULL; @@ -1594,3 +1575,29 @@ va_end(vargs); return 1; } + + +/* For type constructors that don't take keyword args + * + * Sets a TypeError and returns 0 if the kwds dict is + * not emtpy, returns 1 otherwise + */ +int +_PyArg_NoKeywords(char *funcname, PyObject *kw) +{ + if (kw == NULL) + return 1; + if (!PyDict_CheckExact(kw)) { + PyErr_BadInternalCall(); + return 0; + } + if (PyDict_Size(kw) == 0) + return 1; + + PyErr_Format(PyExc_TypeError, "%s does not take keyword arguments", + funcname); + return 0; +} + + + Index: getcopyright.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/getcopyright.c,v retrieving revision 1.16.2.2 retrieving revision 1.16.2.3 diff -u -d -r1.16.2.2 -r1.16.2.3 --- getcopyright.c 7 Jan 2005 07:04:39 -0000 1.16.2.2 +++ getcopyright.c 16 Oct 2005 05:24:05 -0000 1.16.2.3 @@ -4,7 +4,7 @@ static char cprt[] = "\ -Copyright (c) 2001-2004 Python Software Foundation.\n\ +Copyright (c) 2001-2005 Python Software Foundation.\n\ All Rights Reserved.\n\ \n\ Copyright (c) 2000 BeOpen.com.\n\ Index: graminit.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/graminit.c,v retrieving revision 2.33.2.3 retrieving revision 2.33.2.4 diff -u -d -r2.33.2.3 -r2.33.2.4 --- graminit.c 15 Apr 2005 02:18:23 -0000 2.33.2.3 +++ graminit.c 16 Oct 2005 05:24:05 -0000 2.33.2.4 @@ -279,10 +279,12 @@ {25, 3}, {0, 1}, }; -static arc arcs_13_2[1] = { +static arc arcs_13_2[2] = { + {43, 4}, {9, 4}, }; -static arc arcs_13_3[1] = { +static arc arcs_13_3[2] = { + {43, 5}, {9, 5}, }; static arc arcs_13_4[1] = { @@ -295,13 +297,12 @@ static state states_13[6] = { {1, arcs_13_0}, {3, arcs_13_1}, - {1, arcs_13_2}, - {1, arcs_13_3}, + {2, arcs_13_2}, + {2, arcs_13_3}, {1, arcs_13_4}, {2, arcs_13_5}, }; static arc arcs_14_0[12] = { - {43, 1}, {44, 1}, {45, 1}, {46, 1}, @@ -313,6 +314,7 @@ {52, 1}, {53, 1}, {54, 1}, + {55, 1}, }; static arc arcs_14_1[1] = { {0, 1}, @@ -322,11 +324,11 @@ {1, arcs_14_1}, }; static arc arcs_15_0[1] = { - {55, 1}, + {56, 1}, }; static arc arcs_15_1[3] = { {26, 2}, - {56, 3}, + {57, 3}, {0, 1}, }; static arc arcs_15_2[2] = { @@ -367,10 +369,10 @@ {2, arcs_15_8}, }; static arc arcs_16_0[1] = { - {57, 1}, + {58, 1}, }; static arc arcs_16_1[1] = { - {58, 2}, + {59, 2}, }; static arc arcs_16_2[1] = { {0, 2}, @@ -381,7 +383,7 @@ {1, arcs_16_2}, }; static arc arcs_17_0[1] = { - {59, 1}, + {60, 1}, }; static arc arcs_17_1[1] = { {0, 1}, @@ -391,11 +393,11 @@ {1, arcs_17_1}, }; static arc arcs_18_0[5] = { - {60, 1}, {61, 1}, {62, 1}, {63, 1}, {64, 1}, + {65, 1}, }; static arc arcs_18_1[1] = { {0, 1}, @@ -405,7 +407,7 @@ {1, arcs_18_1}, }; static arc arcs_19_0[1] = { - {65, 1}, + {66, 1}, }; static arc arcs_19_1[1] = { {0, 1}, @@ -415,7 +417,7 @@ {1, arcs_19_1}, }; static arc arcs_20_0[1] = { - {66, 1}, + {67, 1}, }; static arc arcs_20_1[1] = { {0, 1}, @@ -425,7 +427,7 @@ {1, arcs_20_1}, }; static arc arcs_21_0[1] = { - {67, 1}, + {68, 1}, }; static arc arcs_21_1[2] = { {9, 2}, @@ -440,18 +442,14 @@ {1, arcs_21_2}, }; static arc arcs_22_0[1] = { - {68, 1}, + {43, 1}, }; static arc arcs_22_1[1] = { - {9, 2}, -}; -static arc arcs_22_2[1] = { - {0, 2}, + {0, 1}, }; -static state states_22[3] = { +static state states_22[2] = { {1, arcs_22_0}, {1, arcs_22_1}, - {1, arcs_22_2}, }; static arc arcs_23_0[1] = { {69, 1}, @@ -779,7 +777,7 @@ {93, 1}, }; static arc arcs_38_1[1] = { - {58, 2}, + {59, 2}, }; static arc arcs_38_2[1] = { {82, 3}, @@ -1034,7 +1032,7 @@ }; static arc arcs_50_1[3] = { {123, 0}, - {56, 0}, + {57, 0}, {0, 1}, }; static state states_50[2] = { @@ -1113,7 +1111,8 @@ {144, 5}, {145, 6}, }; -static arc arcs_55_1[2] = { +static arc arcs_55_1[3] = { + {43, 7}, {135, 7}, {15, 5}, }; @@ -1149,7 +1148,7 @@ }; static state states_55[11] = { {7, arcs_55_0}, - {2, arcs_55_1}, + {3, arcs_55_1}, {2, arcs_55_2}, {2, arcs_55_3}, {1, arcs_55_4}, @@ -1533,7 +1532,7 @@ {93, 1}, }; static arc arcs_71_1[1] = { - {58, 2}, + {59, 2}, }; static arc arcs_71_2[1] = { {82, 3}, @@ -1590,7 +1589,7 @@ {93, 1}, }; static arc arcs_74_1[1] = { - {58, 2}, + {59, 2}, }; static arc arcs_74_2[1] = { {82, 3}, @@ -1653,165 +1652,182 @@ {1, arcs_77_0}, {1, arcs_77_1}, }; -static dfa dfas[78] = { +static arc arcs_78_0[1] = { + {160, 1}, +}; +static arc arcs_78_1[2] = { + {9, 2}, + {0, 1}, +}; +static arc arcs_78_2[1] = { + {0, 2}, +}; +static state states_78[3] = { + {1, arcs_78_0}, + {2, arcs_78_1}, + {1, arcs_78_2}, +}; +static dfa dfas[79] = { {256, "single_input", 0, 3, states_0, - "\004\050\014\000\000\000\200\012\076\205\011\162\000\002\000\140\010\111\023\002"}, + "\004\050\014\000\000\000\000\025\074\205\011\162\000\002\000\140\010\111\023\002\001"}, {257, "file_input", 0, 2, states_1, - "\204\050\014\000\000\000\200\012\076\205\011\162\000\002\000\140\010\111\023\002"}, + "\204\050\014\000\000\000\000\025\074\205\011\162\000\002\000\140\010\111\023\002\001"}, {258, "eval_input", 0, 3, states_2, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000\000"}, {259, "decorator", 0, 7, states_3, - "\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {260, "decorators", 0, 2, states_4, - "\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {261, "funcdef", 0, 7, states_5, - "\000\010\004\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\010\004\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {262, "parameters", 0, 4, states_6, - "\000\040\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\040\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {263, "varargslist", 0, 10, states_7, - "\000\040\010\060\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\040\010\060\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {264, "fpdef", 0, 4, states_8, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {265, "fplist", 0, 3, states_9, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {266, "stmt", 0, 2, states_10, - "\000\050\014\000\000\000\200\012\076\205\011\162\000\002\000\140\010\111\023\002"}, + "\000\050\014\000\000\000\000\025\074\205\011\162\000\002\000\140\010\111\023\002\001"}, {267, "simple_stmt", 0, 4, states_11, - "\000\040\010\000\000\000\200\012\076\205\011\000\000\002\000\140\010\111\023\000"}, + "\000\040\010\000\000\000\000\025\074\205\011\000\000\002\000\140\010\111\023\000\001"}, {268, "small_stmt", 0, 2, states_12, - "\000\040\010\000\000\000\200\012\076\205\011\000\000\002\000\140\010\111\023\000"}, + "\000\040\010\000\000\000\000\025\074\205\011\000\000\002\000\140\010\111\023\000\001"}, {269, "expr_stmt", 0, 6, states_13, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000\000"}, {270, "augassign", 0, 2, states_14, - "\000\000\000\000\000\370\177\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\360\377\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {271, "print_stmt", 0, 9, states_15, - "\000\000\000\000\000\000\200\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {272, "del_stmt", 0, 3, states_16, - "\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\004\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {273, "pass_stmt", 0, 2, states_17, - "\000\000\000\000\000\000\000\010\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\020\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {274, "flow_stmt", 0, 2, states_18, - "\000\000\000\000\000\000\000\000\076\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\074\000\000\000\000\000\000\000\000\000\000\000\001"}, {275, "break_stmt", 0, 2, states_19, - "\000\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\004\000\000\000\000\000\000\000\000\000\000\000\000"}, {276, "continue_stmt", 0, 2, states_20, - "\000\000\000\000\000\000\000\000\004\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\010\000\000\000\000\000\000\000\000\000\000\000\000"}, {277, "return_stmt", 0, 3, states_21, - "\000\000\000\000\000\000\000\000\010\000\000\000\000\000\000\000\000\000\000\000"}, - {278, "yield_stmt", 0, 3, states_22, - "\000\000\000\000\000\000\000\000\020\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\020\000\000\000\000\000\000\000\000\000\000\000\000"}, + {278, "yield_stmt", 0, 2, states_22, + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\001"}, {279, "raise_stmt", 0, 7, states_23, - "\000\000\000\000\000\000\000\000\040\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\040\000\000\000\000\000\000\000\000\000\000\000\000"}, {280, "import_stmt", 0, 2, states_24, - "\000\000\000\000\000\000\000\000\000\005\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\005\000\000\000\000\000\000\000\000\000\000\000"}, {281, "import_name", 0, 3, states_25, - "\000\000\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\000\000\000\000"}, {282, "import_from", 0, 7, states_26, - "\000\000\000\000\000\000\000\000\000\004\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\004\000\000\000\000\000\000\000\000\000\000\000"}, {283, "import_as_name", 0, 4, states_27, - "\000\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {284, "dotted_as_name", 0, 4, states_28, - "\000\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {285, "import_as_names", 0, 3, states_29, - "\000\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {286, "dotted_as_names", 0, 2, states_30, - "\000\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {287, "dotted_name", 0, 2, states_31, - "\000\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {288, "global_stmt", 0, 3, states_32, - "\000\000\000\000\000\000\000\000\000\200\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\200\000\000\000\000\000\000\000\000\000\000\000"}, {289, "exec_stmt", 0, 7, states_33, - "\000\000\000\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\000\000\000"}, {290, "assert_stmt", 0, 5, states_34, - "\000\000\000\000\000\000\000\000\000\000\010\000\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\000\010\000\000\000\000\000\000\000\000\000\000"}, {291, "compound_stmt", 0, 2, states_35, - "\000\010\004\000\000\000\000\000\000\000\000\162\000\000\000\000\000\000\000\002"}, + "\000\010\004\000\000\000\000\000\000\000\000\162\000\000\000\000\000\000\000\002\000"}, {292, "if_stmt", 0, 8, states_36, - "\000\000\000\000\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\000\000"}, {293, "while_stmt", 0, 8, states_37, - "\000\000\000\000\000\000\000\000\000\000\000\020\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\000\000\020\000\000\000\000\000\000\000\000\000"}, {294, "for_stmt", 0, 10, states_38, - "\000\000\000\000\000\000\000\000\000\000\000\040\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\000\000\040\000\000\000\000\000\000\000\000\000"}, {295, "try_stmt", 0, 10, states_39, - "\000\000\000\000\000\000\000\000\000\000\000\100\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\000\000\100\000\000\000\000\000\000\000\000\000"}, {296, "except_clause", 0, 5, states_40, - "\000\000\000\000\000\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\000"}, {297, "suite", 0, 5, states_41, - "\004\040\010\000\000\000\200\012\076\205\011\000\000\002\000\140\010\111\023\000"}, + "\004\040\010\000\000\000\000\025\074\205\011\000\000\002\000\140\010\111\023\000\001"}, {298, "test", 0, 4, states_42, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000\000"}, {299, "and_test", 0, 2, states_43, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\003\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\003\000\000"}, {300, "not_test", 0, 3, states_44, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\003\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\003\000\000"}, {301, "comparison", 0, 2, states_45, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000\000"}, {302, "comp_op", 0, 4, states_46, - "\000\000\000\000\000\000\000\000\000\000\004\000\000\362\017\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\000\004\000\000\362\017\000\000\000\000\000\000"}, {303, "expr", 0, 2, states_47, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000\000"}, {304, "xor_expr", 0, 2, states_48, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000\000"}, {305, "and_expr", 0, 2, states_49, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000\000"}, {306, "shift_expr", 0, 2, states_50, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000\000"}, {307, "arith_expr", 0, 2, states_51, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000\000"}, {308, "term", 0, 2, states_52, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000\000"}, {309, "factor", 0, 3, states_53, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000\000"}, {310, "power", 0, 4, states_54, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\111\003\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\111\003\000\000"}, {311, "atom", 0, 11, states_55, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\111\003\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\111\003\000\000"}, {312, "listmaker", 0, 5, states_56, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000\000"}, {313, "testlist_gexp", 0, 5, states_57, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000\000"}, {314, "lambdef", 0, 5, states_58, - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\020\000"}, + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\020\000\000"}, {315, "trailer", 0, 7, states_59, - "\000\040\000\000\000\000\000\000\000\100\000\000\000\000\000\000\000\001\000\000"}, + "\000\040\000\000\000\000\000\000\000\100\000\000\000\000\000\000\000\001\000\000\000"}, {316, "subscriptlist", 0, 3, states_60, - "\000\040\050\000\000\000\000\000\000\100\000\000\000\002\000\140\010\111\023\000"}, + "\000\040\050\000\000\000\000\000\000\100\000\000\000\002\000\140\010\111\023\000\000"}, {317, "subscript", 0, 7, states_61, - "\000\040\050\000\000\000\000\000\000\100\000\000\000\002\000\140\010\111\023\000"}, + "\000\040\050\000\000\000\000\000\000\100\000\000\000\002\000\140\010\111\023\000\000"}, {318, "sliceop", 0, 3, states_62, - "\000\000\040\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\040\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {319, "exprlist", 0, 3, states_63, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\000\000\140\010\111\003\000\000"}, {320, "testlist", 0, 3, states_64, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000\000"}, {321, "testlist_safe", 0, 5, states_65, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000\000"}, {322, "dictmaker", 0, 5, states_66, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000\000"}, {323, "classdef", 0, 8, states_67, - "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\002"}, + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\002\000"}, {324, "arglist", 0, 8, states_68, - "\000\040\010\060\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000"}, + "\000\040\010\060\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000\000"}, {325, "argument", 0, 5, states_69, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000\000"}, {326, "list_iter", 0, 2, states_70, - "\000\000\000\000\000\000\000\000\000\000\000\042\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\000\000\042\000\000\000\000\000\000\000\000\000"}, {327, "list_for", 0, 6, states_71, - "\000\000\000\000\000\000\000\000\000\000\000\040\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\000\000\040\000\000\000\000\000\000\000\000\000"}, {328, "list_if", 0, 4, states_72, - "\000\000\000\000\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\000\000"}, {329, "gen_iter", 0, 2, states_73, - "\000\000\000\000\000\000\000\000\000\000\000\042\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\000\000\042\000\000\000\000\000\000\000\000\000"}, {330, "gen_for", 0, 6, states_74, - "\000\000\000\000\000\000\000\000\000\000\000\040\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\000\000\040\000\000\000\000\000\000\000\000\000"}, {331, "gen_if", 0, 4, states_75, - "\000\000\000\000\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\000"}, + "\000\000\000\000\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\000\000"}, {332, "testlist1", 0, 2, states_76, - "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000"}, + "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000\000"}, {333, "encoding_decl", 0, 2, states_77, - "\000\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + "\000\000\010\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, + {334, "yield_expr", 0, 3, states_78, + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\001"}, }; -static label labels[160] = { +static label labels[161] = { {0, "EMPTY"}, {256, 0}, {4, 0}, @@ -1855,6 +1871,7 @@ {289, 0}, {290, 0}, {270, 0}, + {334, 0}, {37, 0}, {38, 0}, {39, 0}, @@ -1880,7 +1897,6 @@ {1, "break"}, {1, "continue"}, {1, "return"}, - {1, "yield"}, {1, "raise"}, {281, 0}, {282, 0}, @@ -1972,10 +1988,11 @@ {329, 0}, {331, 0}, {333, 0}, + {1, "yield"}, }; grammar _PyParser_Grammar = { - 78, + 79, dfas, - {160, labels}, + {161, labels}, 256 }; Index: import.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/import.c,v retrieving revision 2.208.2.6 retrieving revision 2.208.2.7 diff -u -d -r2.208.2.6 -r2.208.2.7 --- import.c 14 Oct 2005 07:21:26 -0000 2.208.2.6 +++ import.c 16 Oct 2005 05:24:05 -0000 2.208.2.7 @@ -51,8 +51,9 @@ Python 2.4a0: 62041 Python 2.4a3: 62051 Python 2.4b1: 62061 + Python 2.5a0: 62071 */ -#define MAGIC (62061 | ((long)'\r'<<16) | ((long)'\n'<<24)) +#define MAGIC (62071 | ((long)'\r'<<16) | ((long)'\n'<<24)) /* Magic word as global; note that _PyImport_Init() can change the value of this global to accommodate for alterations of how the @@ -260,6 +261,18 @@ return 1; } +/* This function is called from PyOS_AfterFork to ensure that newly + created child processes do not share locks with the parent. */ + +void +_PyImport_ReInitLock(void) +{ +#ifdef _AIX + if (import_lock != NULL) + import_lock = PyThread_allocate_lock(); +#endif +} + #else #define lock_import() @@ -856,8 +869,12 @@ PyObject *m; mtime = PyOS_GetLastModificationTime(pathname, fp); - if (mtime == (time_t)(-1)) + if (mtime == (time_t)(-1)) { + PyErr_Format(PyExc_RuntimeError, + "unable to get modification time from '%s'", + pathname); return NULL; + } #if SIZEOF_TIME_T > 4 /* Python's .pyc timestamp handling presumes that the timestamp fits in 4 bytes. This will be fine until sometime in the year 2038, @@ -1378,16 +1395,13 @@ /* First we may need a pile of platform-specific header files; the sequence * of #if's here should match the sequence in the body of case_ok(). */ -#if defined(MS_WINDOWS) || defined(__CYGWIN__) +#if defined(MS_WINDOWS) #include -#ifdef __CYGWIN__ -#include -#endif #elif defined(DJGPP) #include -#elif defined(__MACH__) && defined(__APPLE__) && defined(HAVE_DIRENT_H) +#elif (defined(__MACH__) && defined(__APPLE__) || defined(__CYGWIN__)) && defined(HAVE_DIRENT_H) #include #include @@ -1408,23 +1422,15 @@ * match the sequence just above. */ -/* MS_WINDOWS || __CYGWIN__ */ -#if defined(MS_WINDOWS) || defined(__CYGWIN__) +/* MS_WINDOWS */ +#if defined(MS_WINDOWS) WIN32_FIND_DATA data; HANDLE h; -#ifdef __CYGWIN__ - char tempbuf[MAX_PATH]; -#endif if (Py_GETENV("PYTHONCASEOK") != NULL) return 1; -#ifdef __CYGWIN__ - cygwin32_conv_to_win32_path(buf, tempbuf); - h = FindFirstFile(tempbuf, &data); -#else h = FindFirstFile(buf, &data); -#endif if (h == INVALID_HANDLE_VALUE) { PyErr_Format(PyExc_NameError, "Can't find file for module %.100s\n(filename %.300s)", @@ -1451,8 +1457,8 @@ } return strncmp(ffblk.ff_name, name, namelen) == 0; -/* new-fangled macintosh (macosx) */ -#elif defined(__MACH__) && defined(__APPLE__) && defined(HAVE_DIRENT_H) +/* new-fangled macintosh (macosx) or Cygwin */ +#elif (defined(__MACH__) && defined(__APPLE__) || defined(__CYGWIN__)) && defined(HAVE_DIRENT_H) DIR *dirp; struct dirent *dp; char dirname[MAXPATHLEN + 1]; @@ -2299,13 +2305,14 @@ if (parentname == NULL) return NULL; parent = PyDict_GetItem(modules, parentname); - Py_DECREF(parentname); if (parent == NULL) { PyErr_Format(PyExc_ImportError, "reload(): parent %.200s not in sys.modules", - name); + PyString_AS_STRING(parentname)); + Py_DECREF(parentname); return NULL; } + Py_DECREF(parentname); subname++; path = PyObject_GetAttrString(parent, "__path__"); if (path == NULL) Index: marshal.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/marshal.c,v retrieving revision 1.72.2.3 retrieving revision 1.72.2.4 diff -u -d -r1.72.2.3 -r1.72.2.4 --- marshal.c 7 Jan 2005 07:04:56 -0000 1.72.2.3 +++ marshal.c 16 Oct 2005 05:24:05 -0000 1.72.2.4 @@ -16,26 +16,30 @@ */ #define MAX_MARSHAL_STACK_DEPTH 5000 -#define TYPE_NULL '0' -#define TYPE_NONE 'N' -#define TYPE_FALSE 'F' -#define TYPE_TRUE 'T' -#define TYPE_STOPITER 'S' -#define TYPE_ELLIPSIS '.' -#define TYPE_INT 'i' -#define TYPE_INT64 'I' -#define TYPE_FLOAT 'f' -#define TYPE_COMPLEX 'x' -#define TYPE_LONG 'l' -#define TYPE_STRING 's' -#define TYPE_INTERNED 't' -#define TYPE_STRINGREF 'R' -#define TYPE_TUPLE '(' -#define TYPE_LIST '[' -#define TYPE_DICT '{' -#define TYPE_CODE 'c' -#define TYPE_UNICODE 'u' -#define TYPE_UNKNOWN '?' +#define TYPE_NULL '0' +#define TYPE_NONE 'N' +#define TYPE_FALSE 'F' +#define TYPE_TRUE 'T' +#define TYPE_STOPITER 'S' +#define TYPE_ELLIPSIS '.' +#define TYPE_INT 'i' +#define TYPE_INT64 'I' +#define TYPE_FLOAT 'f' +#define TYPE_BINARY_FLOAT 'g' +#define TYPE_COMPLEX 'x' +#define TYPE_BINARY_COMPLEX 'y' +#define TYPE_LONG 'l' +#define TYPE_STRING 's' +#define TYPE_INTERNED 't' +#define TYPE_STRINGREF 'R' +#define TYPE_TUPLE '(' +#define TYPE_LIST '[' +#define TYPE_DICT '{' +#define TYPE_CODE 'c' +#define TYPE_UNICODE 'u' +#define TYPE_UNKNOWN '?' +#define TYPE_SET '<' +#define TYPE_FROZENSET '>' typedef struct { FILE *fp; @@ -46,6 +50,7 @@ char *ptr; char *end; PyObject *strings; /* dict on marshal, list on unmarshal */ + int version; } WFILE; #define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \ @@ -164,32 +169,62 @@ w_short(ob->ob_digit[i], p); } else if (PyFloat_Check(v)) { - char buf[256]; /* Plenty to format any double */ - PyFloat_AsReprString(buf, (PyFloatObject *)v); - n = strlen(buf); - w_byte(TYPE_FLOAT, p); - w_byte(n, p); - w_string(buf, n, p); + if (p->version > 1) { + unsigned char buf[8]; + if (_PyFloat_Pack8(PyFloat_AsDouble(v), + buf, 1) < 0) { + p->error = 1; + return; + } + w_byte(TYPE_BINARY_FLOAT, p); + w_string((char*)buf, 8, p); + } + else { + char buf[256]; /* Plenty to format any double */ + PyFloat_AsReprString(buf, (PyFloatObject *)v); + n = strlen(buf); + w_byte(TYPE_FLOAT, p); + w_byte(n, p); + w_string(buf, n, p); + } } #ifndef WITHOUT_COMPLEX else if (PyComplex_Check(v)) { - char buf[256]; /* Plenty to format any double */ - PyFloatObject *temp; - w_byte(TYPE_COMPLEX, p); - temp = (PyFloatObject*)PyFloat_FromDouble( - PyComplex_RealAsDouble(v)); - PyFloat_AsReprString(buf, temp); - Py_DECREF(temp); - n = strlen(buf); - w_byte(n, p); - w_string(buf, n, p); - temp = (PyFloatObject*)PyFloat_FromDouble( - PyComplex_ImagAsDouble(v)); - PyFloat_AsReprString(buf, temp); - Py_DECREF(temp); - n = strlen(buf); - w_byte(n, p); - w_string(buf, n, p); + if (p->version > 1) { + unsigned char buf[8]; + if (_PyFloat_Pack8(PyComplex_RealAsDouble(v), + buf, 1) < 0) { + p->error = 1; + return; + } + w_byte(TYPE_BINARY_COMPLEX, p); + w_string((char*)buf, 8, p); + if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v), + buf, 1) < 0) { + p->error = 1; + return; + } + w_string((char*)buf, 8, p); + } + else { + char buf[256]; /* Plenty to format any double */ + PyFloatObject *temp; + w_byte(TYPE_COMPLEX, p); + temp = (PyFloatObject*)PyFloat_FromDouble( + PyComplex_RealAsDouble(v)); + PyFloat_AsReprString(buf, temp); + Py_DECREF(temp); + n = strlen(buf); + w_byte(n, p); + w_string(buf, n, p); + temp = (PyFloatObject*)PyFloat_FromDouble( + PyComplex_ImagAsDouble(v)); + PyFloat_AsReprString(buf, temp); + Py_DECREF(temp); + n = strlen(buf); + w_byte(n, p); + w_string(buf, n, p); + } } #endif else if (PyString_Check(v)) { @@ -259,6 +294,37 @@ } w_object((PyObject *)NULL, p); } + else if (PyAnySet_Check(v)) { + PyObject *value, *it; + + if (PyObject_TypeCheck(v, &PySet_Type)) + w_byte(TYPE_SET, p); + else + w_byte(TYPE_FROZENSET, p); + n = PyObject_Size(v); + if (n == -1) { + p->depth--; + p->error = 1; + return; + } + w_long((long)n, p); + it = PyObject_GetIter(v); + if (it == NULL) { + p->depth--; + p->error = 1; + return; + } + while ((value = PyIter_Next(it)) != NULL) { + w_object(value, p); + Py_DECREF(value); + } + Py_DECREF(it); + if (PyErr_Occurred()) { + p->depth--; + p->error = 1; + return; + } + } else if (PyCode_Check(v)) { PyCodeObject *co = (PyCodeObject *)v; w_byte(TYPE_CODE, p); @@ -303,6 +369,7 @@ wf.error = 0; wf.depth = 0; wf.strings = NULL; + wf.version = version; w_long(x, &wf); } @@ -314,6 +381,7 @@ wf.error = 0; wf.depth = 0; wf.strings = (version > 0) ? PyDict_New() : NULL; + wf.version = version; w_object(x, &wf); Py_XDECREF(wf.strings); } @@ -407,7 +475,7 @@ { /* NULL is a valid return value, it does not necessarily means that an exception is set. */ - PyObject *v, *v2; + PyObject *v, *v2, *v3; long i, n; int type = r_byte(p); @@ -487,6 +555,22 @@ return PyFloat_FromDouble(dx); } + case TYPE_BINARY_FLOAT: + { + unsigned char buf[8]; + double x; + if (r_string((char*)buf, 8, p) != 8) { + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + return NULL; + } + x = _PyFloat_Unpack8(buf, 1); + if (x == -1.0 && PyErr_Occurred()) { + return NULL; + } + return PyFloat_FromDouble(x); + } + #ifndef WITHOUT_COMPLEX case TYPE_COMPLEX: { @@ -514,6 +598,31 @@ PyFPE_END_PROTECT(c) return PyComplex_FromCComplex(c); } + + case TYPE_BINARY_COMPLEX: + { + unsigned char buf[8]; + Py_complex c; + if (r_string((char*)buf, 8, p) != 8) { + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + return NULL; + } + c.real = _PyFloat_Unpack8(buf, 1); + if (c.real == -1.0 && PyErr_Occurred()) { + return NULL; + } + if (r_string((char*)buf, 8, p) != 8) { + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + return NULL; + } + c.imag = _PyFloat_Unpack8(buf, 1); + if (c.imag == -1.0 && PyErr_Occurred()) { + return NULL; + } + return PyComplex_FromCComplex(c); + } #endif case TYPE_INTERNED: @@ -524,13 +633,13 @@ return NULL; } v = PyString_FromStringAndSize((char *)NULL, n); - if (v != NULL) { - if (r_string(PyString_AS_STRING(v), (int)n, p) != n) { - Py_DECREF(v); - v = NULL; - PyErr_SetString(PyExc_EOFError, + if (v == NULL) + return v; + if (r_string(PyString_AS_STRING(v), (int)n, p) != n) { + Py_DECREF(v); + PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); - } + return NULL; } if (type == TYPE_INTERNED) { PyString_InternInPlace(&v); @@ -540,6 +649,10 @@ case TYPE_STRINGREF: n = r_long(p); + if (n < 0 || n >= PyList_GET_SIZE(p->strings)) { + PyErr_SetString(PyExc_ValueError, "bad marshal data"); + return NULL; + } v = PyList_GET_ITEM(p->strings, n); Py_INCREF(v); return v; @@ -636,6 +749,37 @@ } return v; + case TYPE_SET: + case TYPE_FROZENSET: + n = r_long(p); + if (n < 0) { + PyErr_SetString(PyExc_ValueError, "bad marshal data"); + return NULL; + } + v = PyTuple_New((int)n); + if (v == NULL) + return v; + for (i = 0; i < n; i++) { + v2 = r_object(p); + if ( v2 == NULL ) { + if (!PyErr_Occurred()) + PyErr_SetString(PyExc_TypeError, + "NULL object in marshal data"); + Py_DECREF(v); + v = NULL; + break; + } + PyTuple_SET_ITEM(v, (int)i, v2); + } + if (v == NULL) + return v; + if (type == TYPE_SET) + v3 = PySet_New(v); + else + v3 = PyFrozenSet_New(v); + Py_DECREF(v); + return v3; + case TYPE_CODE: if (PyEval_GetRestricted()) { PyErr_SetString(PyExc_RuntimeError, @@ -644,30 +788,63 @@ return NULL; } else { - int argcount = r_long(p); - int nlocals = r_long(p); - int stacksize = r_long(p); - int flags = r_long(p); - PyObject *code = r_object(p); - PyObject *consts = r_object(p); - PyObject *names = r_object(p); - PyObject *varnames = r_object(p); - PyObject *freevars = r_object(p); - PyObject *cellvars = r_object(p); - PyObject *filename = r_object(p); - PyObject *name = r_object(p); - int firstlineno = r_long(p); - PyObject *lnotab = r_object(p); + int argcount; + int nlocals; + int stacksize; + int flags; + PyObject *code = NULL; + PyObject *consts = NULL; + PyObject *names = NULL; + PyObject *varnames = NULL; + PyObject *freevars = NULL; + PyObject *cellvars = NULL; + PyObject *filename = NULL; + PyObject *name = NULL; + int firstlineno; + PyObject *lnotab = NULL; + + v = NULL; - if (!PyErr_Occurred()) { - v = (PyObject *) PyCode_New( + argcount = r_long(p); + nlocals = r_long(p); + stacksize = r_long(p); + flags = r_long(p); + code = r_object(p); + if (code == NULL) + goto code_error; + consts = r_object(p); + if (consts == NULL) + goto code_error; + names = r_object(p); + if (names == NULL) + goto code_error; + varnames = r_object(p); + if (varnames == NULL) + goto code_error; + freevars = r_object(p); + if (freevars == NULL) + goto code_error; + cellvars = r_object(p); + if (cellvars == NULL) + goto code_error; + filename = r_object(p); + if (filename == NULL) + goto code_error; + name = r_object(p); + if (name == NULL) + goto code_error; + firstlineno = r_long(p); + lnotab = r_object(p); + if (lnotab == NULL) + goto code_error; + + v = (PyObject *) PyCode_New( argcount, nlocals, stacksize, flags, code, consts, names, varnames, freevars, cellvars, filename, name, firstlineno, lnotab); - } - else - v = NULL; + + code_error: Py_XDECREF(code); Py_XDECREF(consts); Py_XDECREF(names); @@ -819,6 +996,7 @@ wf.end = wf.ptr + PyString_Size(wf.str); wf.error = 0; wf.depth = 0; + wf.version = version; wf.strings = (version > 0) ? PyDict_New() : NULL; w_object(x, &wf); Py_XDECREF(wf.strings); @@ -906,7 +1084,7 @@ char *s; int n; PyObject* result; - if (!PyArg_ParseTuple(args, "s#|i:loads", &s, &n)) + if (!PyArg_ParseTuple(args, "s#:loads", &s, &n)) return NULL; rf.fp = NULL; rf.ptr = s; Index: pystate.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/pystate.c,v retrieving revision 2.20.18.2 retrieving revision 2.20.18.3 diff -u -d -r2.20.18.2 -r2.20.18.3 --- pystate.c 7 Jan 2005 07:04:57 -0000 2.20.18.2 +++ pystate.c 16 Oct 2005 05:24:05 -0000 2.20.18.3 @@ -36,6 +36,12 @@ #define HEAD_INIT() (void)(head_mutex || (head_mutex = PyThread_allocate_lock())) #define HEAD_LOCK() PyThread_acquire_lock(head_mutex, WAIT_LOCK) #define HEAD_UNLOCK() PyThread_release_lock(head_mutex) + +/* The single PyInterpreterState used by this process' + GILState implementation +*/ +static PyInterpreterState *autoInterpreterState = NULL; +static int autoTLSkey = 0; #else #define HEAD_INIT() /* Nothing */ #define HEAD_LOCK() /* Nothing */ @@ -47,6 +53,10 @@ PyThreadState *_PyThreadState_Current = NULL; PyThreadFrameGetter _PyThreadState_GetFrame = NULL; +#ifdef WITH_THREAD +static void _PyGILState_NoteThreadState(PyThreadState* tstate); +#endif + PyInterpreterState * PyInterpreterState_New(void) @@ -180,6 +190,10 @@ tstate->c_profileobj = NULL; tstate->c_traceobj = NULL; +#ifdef WITH_THREAD + _PyGILState_NoteThreadState(tstate); +#endif + HEAD_LOCK(); tstate->next = interp->tstate_head; interp->tstate_head = tstate; @@ -261,6 +275,8 @@ "PyThreadState_DeleteCurrent: no current tstate"); _PyThreadState_Current = NULL; tstate_delete_common(tstate); + if (autoTLSkey && PyThread_get_key_value(autoTLSkey) == tstate) + PyThread_delete_key_value(autoTLSkey); PyEval_ReleaseLock(); } #endif /* WITH_THREAD */ @@ -289,7 +305,7 @@ #if defined(Py_DEBUG) && defined(WITH_THREAD) if (new) { PyThreadState *check = PyGILState_GetThisThreadState(); - if (check && check != new) + if (check && check->interp == new->interp && check != new) Py_FatalError("Invalid thread state for this thread"); } #endif @@ -320,7 +336,7 @@ /* Asynchronously raise an exception in a thread. Requested by Just van Rossum and Alex Martelli. - To prevent naive misuse, you must write your own exception + To prevent naive misuse, you must write your own extension to call this. Must be called with the GIL held. Returns the number of tstates modified; if it returns a number greater than one, you're in trouble, and you should call it again @@ -332,6 +348,7 @@ PyInterpreterState *interp = tstate->interp; PyThreadState *p; int count = 0; + HEAD_LOCK(); for (p = interp->tstate_head; p != NULL; p = p->next) { if (p->thread_id != id) continue; @@ -340,6 +357,7 @@ p->async_exc = exc; count += 1; } + HEAD_UNLOCK(); return count; } @@ -391,12 +409,6 @@ return tstate == _PyThreadState_Current; } -/* The single PyInterpreterState used by this process' - GILState implementation -*/ -static PyInterpreterState *autoInterpreterState = NULL; -static int autoTLSkey = 0; - /* Internal initialization/finalization functions called by Py_Initialize/Py_Finalize */ @@ -406,12 +418,10 @@ assert(i && t); /* must init with valid states */ autoTLSkey = PyThread_create_key(); autoInterpreterState = i; - /* Now stash the thread state for this thread in TLS */ assert(PyThread_get_key_value(autoTLSkey) == NULL); - if (PyThread_set_key_value(autoTLSkey, (void *)t) < 0) - Py_FatalError("Couldn't create autoTLSkey mapping"); - assert(t->gilstate_counter == 0); /* must be a new thread state */ - t->gilstate_counter = 1; + assert(t->gilstate_counter == 0); + + _PyGILState_NoteThreadState(t); } void @@ -422,6 +432,41 @@ autoInterpreterState = NULL;; } +/* When a thread state is created for a thread by some mechanism other than + PyGILState_Ensure, it's important that the GILState machinery knows about + it so it doesn't try to create another thread state for the thread (this is + a better fix for SF bug #1010677 than the first one attempted). +*/ +void +_PyGILState_NoteThreadState(PyThreadState* tstate) +{ + /* If autoTLSkey is 0, this must be the very first threadstate created + in Py_Initialize(). Don't do anything for now (we'll be back here + when _PyGILState_Init is called). */ + if (!autoTLSkey) + return; + + /* Stick the thread state for this thread in thread local storage. + + The only situation where you can legitimately have more than one + thread state for an OS level thread is when there are multiple + interpreters, when: + + a) You shouldn't really be using the PyGILState_ APIs anyway, + and: + + b) The slightly odd way PyThread_set_key_value works (see + comments by its implementation) means that the first thread + state created for that given OS level thread will "win", + which seems reasonable behaviour. + */ + if (PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0) + Py_FatalError("Couldn't create autoTLSkey mapping"); + + /* PyGILState_Release must not try to delete this thread state. */ + tstate->gilstate_counter = 1; +} + /* The public functions */ PyThreadState * PyGILState_GetThisThreadState(void) @@ -448,8 +493,9 @@ tcur = PyThreadState_New(autoInterpreterState); if (tcur == NULL) Py_FatalError("Couldn't create thread-state for new thread"); - if (PyThread_set_key_value(autoTLSkey, (void *)tcur) < 0) - Py_FatalError("Couldn't create autoTLSkey mapping"); + /* This is our thread state! We'll need to delete it in the + matching call to PyGILState_Release(). */ + tcur->gilstate_counter = 0; current = 0; /* new thread state is never current */ } else @@ -496,11 +542,9 @@ * habit of coming back). */ PyThreadState_DeleteCurrent(); - /* Delete this thread from our TLS. */ - PyThread_delete_key_value(autoTLSkey); } /* Release the lock if necessary */ else if (oldstate == PyGILState_UNLOCKED) - PyEval_ReleaseThread(tcur); + PyEval_SaveThread(); } #endif /* WITH_THREAD */ Index: pythonrun.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/pythonrun.c,v retrieving revision 2.161.2.18 retrieving revision 2.161.2.19 diff -u -d -r2.161.2.18 -r2.161.2.19 --- pythonrun.c 14 Oct 2005 07:21:27 -0000 2.161.2.18 +++ pythonrun.c 16 Oct 2005 05:24:05 -0000 2.161.2.19 @@ -175,6 +175,8 @@ if (!_PyInt_Init()) Py_FatalError("Py_Initialize: can't init ints"); + _PyFloat_Init(); + interp->modules = PyDict_New(); if (interp->modules == NULL) Py_FatalError("Py_Initialize: can't make modules dictionary"); @@ -395,13 +397,6 @@ _Py_PrintReferences(stderr); #endif /* Py_TRACE_REFS */ - /* Now we decref the exception classes. After this point nothing - can raise an exception. That's okay, because each Fini() method - below has been checked to make sure no exceptions are ever - raised. - */ - _PyExc_Fini(); - /* Cleanup auto-thread-state */ #ifdef WITH_THREAD _PyGILState_Fini(); @@ -410,6 +405,14 @@ /* Clear interpreter state */ PyInterpreterState_Clear(interp); + /* Now we decref the exception classes. After this point nothing + can raise an exception. That's okay, because each Fini() method + below has been checked to make sure no exceptions are ever + raised. + */ + + _PyExc_Fini(); + /* Delete current thread */ PyThreadState_Swap(NULL); PyInterpreterState_Delete(interp); @@ -420,6 +423,7 @@ PyCFunction_Fini(); PyTuple_Fini(); PyList_Fini(); + PySet_Fini(); PyString_Fini(); PyInt_Fini(); PyFloat_Fini(); @@ -1417,20 +1421,25 @@ errtype = PyExc_IndentationError; msg = "too many levels of indentation"; break; - case E_DECODE: { /* XXX */ - PyThreadState* tstate = PyThreadState_GET(); - PyObject* value = tstate->curexc_value; + case E_DECODE: { + PyObject *type, *value, *tb; + PyErr_Fetch(&type, &value, &tb); if (value != NULL) { - u = PyObject_Repr(value); + u = PyObject_Str(value); if (u != NULL) { msg = PyString_AsString(u); - break; } } if (msg == NULL) msg = "unknown decode error"; + Py_DECREF(type); + Py_DECREF(value); + Py_XDECREF(tb); break; } + case E_LINECONT: + msg = "unexpected character after line continuation character"; + break; default: fprintf(stderr, "error=%d\n", err->error); msg = "unknown parsing error"; Index: structmember.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/structmember.c,v retrieving revision 2.23.8.1 retrieving revision 2.23.8.2 diff -u -d -r2.23.8.1 -r2.23.8.2 --- structmember.c 7 Jan 2005 07:04:58 -0000 2.23.8.1 +++ structmember.c 16 Oct 2005 05:24:06 -0000 2.23.8.2 @@ -118,6 +118,14 @@ PyErr_SetString(PyExc_AttributeError, l->name); Py_XINCREF(v); break; +#ifdef HAVE_LONG_LONG + case T_LONGLONG: + v = PyLong_FromLongLong(*(PY_LONG_LONG *)addr); + break; + case T_ULONGLONG: + v = PyLong_FromUnsignedLongLong(*(unsigned PY_LONG_LONG *)addr); + break; +#endif /* HAVE_LONG_LONG */ default: PyErr_SetString(PyExc_SystemError, "bad memberdescr type"); v = NULL; @@ -246,6 +254,30 @@ return -1; } break; +#ifdef HAVE_LONG_LONG + case T_LONGLONG: + if (!PyLong_Check(v)) { + PyErr_BadArgument(); + return -1; + } else { + *(PY_LONG_LONG*)addr = PyLong_AsLongLong(v); + if ((*addr == -1) && PyErr_Occurred()) { + return -1; + } + } + break; + case T_ULONGLONG: + if (!PyLong_Check(v)) { + PyErr_BadArgument(); + return -1; + } else { + *(unsigned PY_LONG_LONG*)addr = PyLong_AsUnsignedLongLong(v); + if ((*addr == -1) && PyErr_Occurred()) { + return -1; + } + } + break; +#endif /* HAVE_LONG_LONG */ default: PyErr_Format(PyExc_SystemError, "bad memberdescr type for %s", l->name); Index: sysmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/sysmodule.c,v retrieving revision 2.107.2.3 retrieving revision 2.107.2.4 diff -u -d -r2.107.2.3 -r2.107.2.4 --- sysmodule.c 7 Jan 2005 07:04:59 -0000 2.107.2.3 +++ sysmodule.c 16 Oct 2005 05:24:06 -0000 2.107.2.4 @@ -927,6 +927,13 @@ ) /* end of sys_doc */ ; +static int +_check_and_flush (FILE *stream) +{ + int prev_fail = ferror (stream); + return fflush (stream) || prev_fail ? EOF : 0; +} + PyObject * _PySys_Init(void) { @@ -940,9 +947,27 @@ m = Py_InitModule3("sys", sys_methods, sys_doc); sysdict = PyModule_GetDict(m); + { + /* XXX: does this work on Win/Win64? (see posix_fstat) */ + struct stat sb; + if (fstat(fileno(stdin), &sb) == 0 && + S_ISDIR(sb.st_mode)) { + Py_FatalError(" is a directory"); + } + } + + /* Closing the standard FILE* if sys.std* goes aways causes problems + * for embedded Python usages. Closing them when somebody explicitly + * invokes .close() might be possible, but the FAQ promises they get + * never closed. However, we still need to get write errors when + * writing fails (e.g. because stdout is redirected), so we flush the + * streams and check for errors before the file objects are deleted. + * On OS X, fflush()ing stdin causes an error, so we exempt stdin + * from that procedure. + */ sysin = PyFile_FromFile(stdin, "", "r", NULL); - sysout = PyFile_FromFile(stdout, "", "w", NULL); - syserr = PyFile_FromFile(stderr, "", "w", NULL); + sysout = PyFile_FromFile(stdout, "", "w", _check_and_flush); + syserr = PyFile_FromFile(stderr, "", "w", _check_and_flush); if (PyErr_Occurred()) return NULL; #ifdef MS_WINDOWS @@ -1172,7 +1197,7 @@ char link[MAXPATHLEN+1]; char argv0copy[2*MAXPATHLEN+1]; int nr = 0; - if (argc > 0 && argv0 != NULL) + if (argc > 0 && argv0 != NULL && strcmp(argv0, "-c") != 0) nr = readlink(argv0, link, MAXPATHLEN); if (nr > 0) { /* It's a symlink */ @@ -1197,7 +1222,7 @@ } #endif /* HAVE_READLINK */ #if SEP == '\\' /* Special case for MS filename syntax */ - if (argc > 0 && argv0 != NULL) { + if (argc > 0 && argv0 != NULL && strcmp(argv0, "-c") != 0) { char *q; #ifdef MS_WINDOWS char *ptemp; @@ -1220,7 +1245,7 @@ } } #else /* All other filename syntaxes */ - if (argc > 0 && argv0 != NULL) { + if (argc > 0 && argv0 != NULL && strcmp(argv0, "-c") != 0) { #if defined(HAVE_REALPATH) if (realpath(argv0, fullpath)) { argv0 = fullpath; Index: thread.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/thread.c,v retrieving revision 2.44.2.2 retrieving revision 2.44.2.3 diff -u -d -r2.44.2.2 -r2.44.2.3 --- thread.c 7 Jan 2005 07:04:59 -0000 2.44.2.2 +++ thread.c 16 Oct 2005 05:24:06 -0000 2.44.2.3 @@ -45,6 +45,20 @@ #define SUN_LWP #endif +/* Check if we're running on HP-UX and _SC_THREADS is defined. If so, then + enough of the Posix threads package is implimented to support python + threads. + + This is valid for HP-UX 11.23 running on an ia64 system. If needed, add + a check of __ia64 to verify that we're running on a ia64 system instead + of a pa-risc system. +*/ +#ifdef __hpux +#ifdef _SC_THREADS +#define _POSIX_THREADS +#endif +#endif + #endif /* _POSIX_THREADS */ Index: thread_nt.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/thread_nt.h,v retrieving revision 2.22.2.1 retrieving revision 2.22.2.2 diff -u -d -r2.22.2.1 -r2.22.2.2 --- thread_nt.h 7 Jan 2005 07:05:00 -0000 2.22.2.1 +++ thread_nt.h 16 Oct 2005 05:24:06 -0000 2.22.2.2 @@ -299,7 +299,7 @@ dprintf(("%ld: PyThread_acquire_lock(%p, %d) called\n", PyThread_get_thread_ident(),aLock, waitflag)); - success = aLock && EnterNonRecursiveMutex((PNRMUTEX) aLock, (waitflag == 1 ? INFINITE : 0)) == WAIT_OBJECT_0 ; + success = aLock && EnterNonRecursiveMutex((PNRMUTEX) aLock, (waitflag ? INFINITE : 0)) == WAIT_OBJECT_0 ; dprintf(("%ld: PyThread_acquire_lock(%p, %d) -> %d\n", PyThread_get_thread_ident(),aLock, waitflag, success)); Index: thread_os2.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/thread_os2.h,v retrieving revision 2.14.2.1 retrieving revision 2.14.2.2 diff -u -d -r2.14.2.1 -r2.14.2.2 --- thread_os2.h 28 Apr 2003 17:16:44 -0000 2.14.2.1 +++ thread_os2.h 16 Oct 2005 05:24:06 -0000 2.14.2.2 @@ -14,6 +14,10 @@ long PyThread_get_thread_ident(void); #endif +#if !defined(THREAD_STACK_SIZE) +#define THREAD_STACK_SIZE 0x10000 +#endif + /* * Initialization of the C package, should not be needed. */ @@ -31,7 +35,7 @@ int aThread; int success = 0; - aThread = _beginthread(func,NULL,65536,arg); + aThread = _beginthread(func, NULL, THREAD_STACK_SIZE, arg); if (aThread == -1) { success = -1; Index: thread_pthread.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/thread_pthread.h,v retrieving revision 2.40.2.2 retrieving revision 2.40.2.3 diff -u -d -r2.40.2.2 -r2.40.2.3 --- thread_pthread.h 7 Jan 2005 07:05:00 -0000 2.40.2.2 +++ thread_pthread.h 16 Oct 2005 05:24:06 -0000 2.40.2.3 @@ -16,9 +16,15 @@ family of functions must indicate this by defining _POSIX_SEMAPHORES. */ #ifdef _POSIX_SEMAPHORES +/* On FreeBSD 4.x, _POSIX_SEMAPHORES is defined empty, so + we need to add 0 to make it work there as well. */ +#if (_POSIX_SEMAPHORES+0) == -1 +#define HAVE_BROKEN_POSIX_SEMAPHORES +#else #include #include #endif +#endif #if !defined(pthread_attr_default) # define pthread_attr_default ((pthread_attr_t *)NULL) @@ -349,8 +355,8 @@ PyThread_init_thread(); lock = (pthread_lock *) malloc(sizeof(pthread_lock)); - memset((void *)lock, '\0', sizeof(pthread_lock)); if (lock) { + memset((void *)lock, '\0', sizeof(pthread_lock)); lock->locked = 0; status = pthread_mutex_init(&lock->mut, Index: thread_wince.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/thread_wince.h,v retrieving revision 2.7 retrieving revision 2.7.12.1 diff -u -d -r2.7 -r2.7.12.1 --- thread_wince.h 16 Oct 2001 21:13:49 -0000 2.7 +++ thread_wince.h 16 Oct 2005 05:24:06 -0000 2.7.12.1 @@ -140,13 +140,13 @@ dprintf(("%ld: PyThread_acquire_lock(%p, %d) called\n", PyThread_get_thread_ident(),aLock, waitflag)); #ifndef DEBUG - waitResult = WaitForSingleObject(aLock, (waitflag == 1 ? INFINITE : 0)); + waitResult = WaitForSingleObject(aLock, (waitflag ? INFINITE : 0)); #else /* To aid in debugging, we regularly wake up. This allows us to break into the debugger */ while (TRUE) { waitResult = WaitForSingleObject(aLock, waitflag ? 3000 : 0); - if (waitflag==0 || (waitflag==1 && waitResult == WAIT_OBJECT_0)) + if (waitflag==0 || (waitflag && waitResult == WAIT_OBJECT_0)) break; } #endif From jhylton at users.sourceforge.net Sun Oct 16 07:24:29 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:29 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Demo/embed Makefile, 1.11.8.1, 1.11.8.2 Message-ID: <20051016052429.7D6BF1E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Demo/embed In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Demo/embed Modified Files: Tag: ast-branch Makefile Log Message: Merge head to branch (for the last time) Index: Makefile =================================================================== RCS file: /cvsroot/python/python/dist/src/Demo/embed/Makefile,v retrieving revision 1.11.8.1 retrieving revision 1.11.8.2 diff -u -d -r1.11.8.1 -r1.11.8.2 --- Makefile 28 Apr 2003 17:39:04 -0000 1.11.8.1 +++ Makefile 16 Oct 2005 05:23:56 -0000 1.11.8.2 @@ -10,7 +10,7 @@ srcdir= ../.. # Python version -VERSION= 2.3 +VERSION= 2.5 # Compiler flags OPT= -g From jhylton at users.sourceforge.net Sun Oct 16 07:24:29 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:29 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Demo/parser README,1.4,1.4.24.1 Message-ID: <20051016052429.9B3A01E4008@bag.python.org> Update of /cvsroot/python/python/dist/src/Demo/parser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Demo/parser Modified Files: Tag: ast-branch README Log Message: Merge head to branch (for the last time) Index: README =================================================================== RCS file: /cvsroot/python/python/dist/src/Demo/parser/README,v retrieving revision 1.4 retrieving revision 1.4.24.1 diff -u -d -r1.4 -r1.4.24.1 --- README 23 Oct 2000 20:50:23 -0000 1.4 +++ README 16 Oct 2005 05:23:56 -0000 1.4.24.1 @@ -20,6 +20,6 @@ handle nested constructs easily using the functions and classes in example.py. - test_parser.py program to put the parser module through it's paces. + test_parser.py program to put the parser module through its paces. Enjoy! From jhylton at users.sourceforge.net Sun Oct 16 07:24:29 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:29 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Demo/classes Complex.py, 1.5.30.2, 1.5.30.3 Message-ID: <20051016052429.A351B1E4009@bag.python.org> Update of /cvsroot/python/python/dist/src/Demo/classes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Demo/classes Modified Files: Tag: ast-branch Complex.py Log Message: Merge head to branch (for the last time) Index: Complex.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Demo/classes/Complex.py,v retrieving revision 1.5.30.2 retrieving revision 1.5.30.3 diff -u -d -r1.5.30.2 -r1.5.30.3 --- Complex.py 7 Jan 2005 06:56:44 -0000 1.5.30.2 +++ Complex.py 16 Oct 2005 05:23:56 -0000 1.5.30.3 @@ -54,7 +54,7 @@ # nor are shift and mask operations. # # The standard module math does not support complex numbers. -# (I suppose it would be easy to implement a cmath module.) +# The cmath modules should be used instead. # # Idea: # add a class Polar(r, phi) and mixed-mode arithmetic which @@ -62,8 +62,8 @@ # Complex for +,-,cmp # Polar for *,/,pow - -import types, math +import math +import sys twopi = math.pi*2.0 halfpi = math.pi/2.0 @@ -74,8 +74,8 @@ def ToComplex(obj): if IsComplex(obj): return obj - elif type(obj) == types.TupleType: - return apply(Complex, obj) + elif isinstance(obj, tuple): + return Complex(*obj) else: return Complex(obj) @@ -86,34 +86,40 @@ def Re(obj): if IsComplex(obj): return obj.re - else: - return obj + return obj def Im(obj): if IsComplex(obj): return obj.im - else: - return obj + return 0 class Complex: def __init__(self, re=0, im=0): + _re = 0 + _im = 0 if IsComplex(re): - im = i + Complex(0, re.im) - re = re.re + _re = re.re + _im = re.im + else: + _re = re if IsComplex(im): - re = re - im.im - im = im.re - self.__dict__['re'] = re - self.__dict__['im'] = im + _re = _re - im.im + _im = _im + im.re + else: + _im = _im + im + # this class is immutable, so setting self.re directly is + # not possible. + self.__dict__['re'] = _re + self.__dict__['im'] = _im def __setattr__(self, name, value): raise TypeError, 'Complex numbers are immutable' def __hash__(self): - if not self.im: return hash(self.re) - mod = sys.maxint + 1L - return int((hash(self.re) + 2L*hash(self.im) + mod) % (2L*mod) - mod) + if not self.im: + return hash(self.re) + return hash((self.re, self.im)) def __repr__(self): if not self.im: @@ -134,8 +140,7 @@ return self def __abs__(self): - # XXX could be done differently to avoid overflow! - return math.sqrt(self.re*self.re + self.im*self.im) + return math.hypot(self.re, self.im) def __int__(self): if self.im: @@ -224,22 +229,41 @@ def checkop(expr, a, b, value, fuzz = 1e-6): - import sys print ' ', a, 'and', b, try: result = eval(expr) except: result = sys.exc_type print '->', result - if (type(result) == type('') or type(value) == type('')): - ok = result == value + if isinstance(result, str) or isinstance(value, str): + ok = (result == value) else: ok = abs(result - value) <= fuzz if not ok: print '!!\t!!\t!! should be', value, 'diff', abs(result - value) - def test(): + print 'test constructors' + constructor_test = ( + # "expect" is an array [re,im] "got" the Complex. + ( (0,0), Complex() ), + ( (0,0), Complex() ), + ( (1,0), Complex(1) ), + ( (0,1), Complex(0,1) ), + ( (1,2), Complex(Complex(1,2)) ), + ( (1,3), Complex(Complex(1,2),1) ), + ( (0,0), Complex(0,Complex(0,0)) ), + ( (3,4), Complex(3,Complex(4)) ), + ( (-1,3), Complex(1,Complex(3,2)) ), + ( (-7,6), Complex(Complex(1,2),Complex(4,8)) ) ) + cnt = [0,0] + for t in constructor_test: + cnt[0] += 1 + if ((t[0][0]!=t[1].re)or(t[0][1]!=t[1].im)): + print " expected", t[0], "got", t[1] + cnt[1] += 1 + print " ", cnt[1], "of", cnt[0], "tests failed" + # test operators testsuite = { 'a+b': [ (1, 10, 11), @@ -285,13 +309,11 @@ (Complex(1), Complex(0,10), 1), ], } - exprs = testsuite.keys() - exprs.sort() - for expr in exprs: + for expr in sorted(testsuite): print expr + ':' t = (expr,) for item in testsuite[expr]: - apply(checkop, t+item) + checkop(*(t+item)) if __name__ == '__main__': From jhylton at users.sourceforge.net Sun Oct 16 07:24:29 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:29 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Demo/xmlrpc xmlrpc_handler.py, 1.2, 1.2.2.1 Message-ID: <20051016052429.C435B1E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Demo/xmlrpc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Demo/xmlrpc Modified Files: Tag: ast-branch xmlrpc_handler.py Log Message: Merge head to branch (for the last time) Index: xmlrpc_handler.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Demo/xmlrpc/xmlrpc_handler.py,v retrieving revision 1.2 retrieving revision 1.2.2.1 diff -u -d -r1.2 -r1.2.2.1 --- xmlrpc_handler.py 3 Apr 2002 21:47:47 -0000 1.2 +++ xmlrpc_handler.py 16 Oct 2005 05:23:56 -0000 1.2.2.1 @@ -26,7 +26,7 @@ def handle_request (self, request): [path, params, query, fragment] = request.split_uri() - if request.command in ('post', 'put'): + if request.command.lower() in ('post', 'put'): request.collector = collector (self, request) else: request.error (400) From jhylton at users.sourceforge.net Sun Oct 16 07:24:29 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:29 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Demo/rpc rpc.py, 1.11.30.1, 1.11.30.2 Message-ID: <20051016052429.E3B951E4008@bag.python.org> Update of /cvsroot/python/python/dist/src/Demo/rpc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Demo/rpc Modified Files: Tag: ast-branch rpc.py Log Message: Merge head to branch (for the last time) Index: rpc.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Demo/rpc/rpc.py,v retrieving revision 1.11.30.1 retrieving revision 1.11.30.2 diff -u -d -r1.11.30.1 -r1.11.30.2 --- rpc.py 7 Jan 2005 06:56:46 -0000 1.11.30.1 +++ rpc.py 16 Oct 2005 05:23:56 -0000 1.11.30.2 @@ -3,6 +3,8 @@ # XXX There should be separate exceptions for the various reasons why # XXX an RPC can fail, rather than using RuntimeError for everything +# XXX Need to use class based exceptions rather than string exceptions + # XXX The UDP version of the protocol resends requests when it does # XXX not receive a timely reply -- use only for idempotent calls! @@ -90,13 +92,13 @@ return (flavor, stuff) def unpack_callheader(self): - xid = self.unpack_uint(xid) + xid = self.unpack_uint() temp = self.unpack_enum() - if temp <> CALL: + if temp != CALL: raise BadRPCFormat, 'no CALL but %r' % (temp,) temp = self.unpack_uint() - if temp <> RPCVERSION: - raise BadRPCVerspion, 'bad RPC version %r' % (temp,) + if temp != RPCVERSION: + raise BadRPCVersion, 'bad RPC version %r' % (temp,) prog = self.unpack_uint() vers = self.unpack_uint() proc = self.unpack_uint() @@ -108,7 +110,7 @@ def unpack_replyheader(self): xid = self.unpack_uint() mtype = self.unpack_enum() - if mtype <> REPLY: + if mtype != REPLY: raise RuntimeError, 'no REPLY but %r' % (mtype,) stat = self.unpack_enum() if stat == MSG_DENIED: @@ -123,7 +125,7 @@ raise RuntimeError, \ 'MSG_DENIED: AUTH_ERROR: %r' % (stat,) raise RuntimeError, 'MSG_DENIED: %r' % (stat,) - if stat <> MSG_ACCEPTED: + if stat != MSG_ACCEPTED: raise RuntimeError, \ 'Neither MSG_DENIED nor MSG_ACCEPTED: %r' % (stat,) verf = self.unpack_auth() @@ -139,7 +141,7 @@ raise RuntimeError, 'call failed: PROC_UNAVAIL' if stat == GARBAGE_ARGS: raise RuntimeError, 'call failed: GARBAGE_ARGS' - if stat <> SUCCESS: + if stat != SUCCESS: raise RuntimeError, 'call failed: %r' % (stat,) return xid, verf # Caller must get procedure-specific part of reply @@ -329,7 +331,7 @@ sock.bind((host, i)) return last_resv_port_tried except socket.error, (errno, msg): - if errno <> 114: + if errno != 114: raise socket.error, (errno, msg) raise RuntimeError, 'can\'t assign reserved port' @@ -348,7 +350,7 @@ u = self.unpacker u.reset(reply) xid, verf = u.unpack_replyheader() - if xid <> self.lastxid: + if xid != self.lastxid: # Can't really happen since this is TCP... raise RuntimeError, 'wrong xid in reply %r instead of %r' % ( xid, self.lastxid) @@ -387,7 +389,7 @@ u = self.unpacker u.reset(reply) xid, verf = u.unpack_replyheader() - if xid <> self.lastxid: + if xid != self.lastxid: ## print 'BAD xid' continue break @@ -443,7 +445,7 @@ u = self.unpacker u.reset(reply) xid, verf = u.unpack_replyheader() - if xid <> self.lastxid: + if xid != self.lastxid: ## print 'BAD xid' continue reply = unpack_func() @@ -678,11 +680,11 @@ xid = self.unpacker.unpack_uint() self.packer.pack_uint(xid) temp = self.unpacker.unpack_enum() - if temp <> CALL: + if temp != CALL: return None # Not worthy of a reply self.packer.pack_uint(REPLY) temp = self.unpacker.unpack_uint() - if temp <> RPCVERSION: + if temp != RPCVERSION: self.packer.pack_uint(MSG_DENIED) self.packer.pack_uint(RPC_MISMATCH) self.packer.pack_uint(RPCVERSION) @@ -691,11 +693,11 @@ self.packer.pack_uint(MSG_ACCEPTED) self.packer.pack_auth((AUTH_NULL, make_auth_null())) prog = self.unpacker.unpack_uint() - if prog <> self.prog: + if prog != self.prog: self.packer.pack_uint(PROG_UNAVAIL) return self.packer.get_buf() vers = self.unpacker.unpack_uint() - if vers <> self.vers: + if vers != self.vers: self.packer.pack_uint(PROG_MISMATCH) self.packer.pack_uint(self.vers) self.packer.pack_uint(self.vers) @@ -812,7 +814,7 @@ def session(self): call, host_port = self.sock.recvfrom(8192) reply = self.handle(call) - if reply <> None: + if reply != None: self.sock.sendto(reply, host_port) From jhylton at users.sourceforge.net Sun Oct 16 07:24:30 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:30 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/commontex license.tex, 1.6.4.1, 1.6.4.2 Message-ID: <20051016052430.16A0A1E4009@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/commontex In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Doc/commontex Modified Files: Tag: ast-branch license.tex Log Message: Merge head to branch (for the last time) Index: license.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/commontex/license.tex,v retrieving revision 1.6.4.1 retrieving revision 1.6.4.2 diff -u -d -r1.6.4.1 -r1.6.4.2 --- license.tex 7 Jan 2005 06:56:53 -0000 1.6.4.1 +++ license.tex 16 Oct 2005 05:23:57 -0000 1.6.4.2 @@ -45,6 +45,9 @@ \linev{2.3.2}{2.3.1}{2003}{PSF}{yes} \linev{2.3.3}{2.3.2}{2003}{PSF}{yes} \linev{2.3.4}{2.3.3}{2004}{PSF}{yes} + \linev{2.3.5}{2.3.4}{2005}{PSF}{yes} + \linev{2.4}{2.3}{2004}{PSF}{yes} + \linev{2.4.1}{2.4}{2005}{PSF}{yes} \end{tablev} \note{GPL-compatible doesn't mean that we're distributing From jhylton at users.sourceforge.net Sun Oct 16 07:24:30 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:30 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc ACKS, 1.37.2.2, 1.37.2.3 Makefile, 1.244.2.2, 1.244.2.3 Makefile.deps, 1.87.2.2, 1.87.2.3 Message-ID: <20051016052430.162801E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Doc Modified Files: Tag: ast-branch ACKS Makefile Makefile.deps Log Message: Merge head to branch (for the last time) Index: ACKS =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ACKS,v retrieving revision 1.37.2.2 retrieving revision 1.37.2.3 diff -u -d -r1.37.2.2 -r1.37.2.3 --- ACKS 7 Jan 2005 06:56:51 -0000 1.37.2.2 +++ ACKS 16 Oct 2005 05:23:56 -0000 1.37.2.3 @@ -23,7 +23,7 @@ A. Amoroso Pehr Anderson Oliver Andrich -Jess Cea Avin +Jesús Cea Avión Daniel Barclay Chris Barker Don Bashford @@ -54,7 +54,7 @@ Carey Evans Martijn Faassen Carl Feynman -Hernn Martnez Foffani +Hernán Martínez Foffani Stefan Franke Jim Fulton Peter Funk @@ -71,7 +71,7 @@ Mark Hammond Harald Hanche-Olsen Manus Hand -Gerhard Hring +Gerhard Häring Travis B. Hartwell Janko Hauser Bernhard Herzog @@ -108,11 +108,11 @@ Detlef Lannert Piers Lauder Glyph Lefkowitz -Marc-Andr Lemburg +Marc-André Lemburg Ulf A. Lindgren Everett Lipman Mirko Liss -Martin von Lwis +Martin von Löwis Fredrik Lundh Jeff MacDonald John Machin @@ -136,11 +136,12 @@ William Park Joonas Paalasmaa Harri Pasanen +Bo Peng Tim Peters Christopher Petrilli Justin D. Pettit Chris Phoenix -Franois Pinard +François Pinard Paul Prescod Eric S. Raymond Edward K. Ream @@ -163,6 +164,7 @@ Justin Sheehy Michael Simcich Ionel Simionescu +Gregory P. Smith Roy Smith Clay Spence Nicholas Spies Index: Makefile =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/Makefile,v retrieving revision 1.244.2.2 retrieving revision 1.244.2.3 diff -u -d -r1.244.2.2 -r1.244.2.3 --- Makefile 7 Jan 2005 06:56:51 -0000 1.244.2.2 +++ Makefile 16 Oct 2005 05:23:56 -0000 1.244.2.3 @@ -87,7 +87,7 @@ # (e.g. OpenBSD needs package gmake installed; use gmake instead of make) PWD=$(shell pwd) -# (The trailing colon in the value is needed; TeX places it's default +# (The trailing colon in the value is needed; TeX places its default # set of paths at the location of the empty string in the path list.) TEXINPUTS=$(PWD)/commontex: Index: Makefile.deps =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/Makefile.deps,v retrieving revision 1.87.2.2 retrieving revision 1.87.2.3 diff -u -d -r1.87.2.2 -r1.87.2.3 --- Makefile.deps 7 Jan 2005 06:56:52 -0000 1.87.2.2 +++ Makefile.deps 16 Oct 2005 05:23:56 -0000 1.87.2.3 @@ -125,6 +125,7 @@ lib/libmarshal.tex \ lib/libwarnings.tex \ lib/libimp.tex \ + lib/libzipimport.tex \ lib/libpkgutil.tex \ lib/libparser.tex \ lib/libbltin.tex \ @@ -163,6 +164,7 @@ lib/libposix.tex \ lib/libposixpath.tex \ lib/libpwd.tex \ + lib/libspwd.tex \ lib/libgrp.tex \ lib/libcrypt.tex \ lib/libdbm.tex \ @@ -200,6 +202,7 @@ lib/librgbimg.tex \ lib/libossaudiodev.tex \ lib/libcrypto.tex \ + lib/libhashlib.tex \ lib/libmd5.tex \ lib/libsha.tex \ lib/libhmac.tex \ From jhylton at users.sourceforge.net Sun Oct 16 07:24:30 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:30 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/doc doc.tex, 1.67.2.2, 1.67.2.3 Message-ID: <20051016052430.8E8681E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/doc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Doc/doc Modified Files: Tag: ast-branch doc.tex Log Message: Merge head to branch (for the last time) Index: doc.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/doc/doc.tex,v retrieving revision 1.67.2.2 retrieving revision 1.67.2.3 diff -u -d -r1.67.2.2 -r1.67.2.3 --- doc.tex 7 Jan 2005 06:56:54 -0000 1.67.2.2 +++ doc.tex 16 Oct 2005 05:23:57 -0000 1.67.2.3 @@ -617,7 +617,7 @@ Documentation for a ``simple'' macro. Simple macros are macros which are used for code expansion, but which do not take arguments so cannot be described as functions. This is not to - be used for simple constant definitions. Examples of it's use + be used for simple constant definitions. Examples of its use in the Python documentation include \csimplemacro{PyObject_HEAD} and \csimplemacro{Py_BEGIN_ALLOW_THREADS}. @@ -812,7 +812,7 @@ The name of a ``simple'' macro. Simple macros are macros which are used for code expansion, but which do not take arguments so cannot be described as functions. This is not to - be used for simple constant definitions. Examples of it's use + be used for simple constant definitions. Examples of its use in the Python documentation include \csimplemacro{PyObject_HEAD} and \csimplemacro{Py_BEGIN_ALLOW_THREADS}. @@ -1134,9 +1134,9 @@ \subsection{Module-specific Markup \label{module-markup}} The markup described in this section is used to provide information - about a module being documented. A typical use of this markup - appears at the top of the section used to document a module. A - typical example might look like this: + about a module being documented. Each module should be documented + in its own \macro{section}. A typical use of this markup + appears at the top of that section and might look like this: \begin{verbatim} \section{\module{spam} --- @@ -1878,31 +1878,42 @@ \subsection{Working on Cygwin \label{cygwin}} Installing the required tools under Cygwin under Cygwin can be a - little tedious, if only because many packages are more difficult - to install under Cygwin. + little tedious. Most of the required packages can be installed + using Cygwin's graphical installer, while netpbm and \LaTeX2HTML + must be installed from source. + + Start with a reasonably modern version of Cygwin. If you haven't + upgraded for a few years, now would be a good time. Using the Cygwin installer, make sure your Cygwin installation includes Perl, Python, and the \TeX{} packages. Perl and Python - are located under \menuselection{Interpreters} in the installer. - The \TeX{} packages are located in the \menuselection{Text} - section; installing the \code{tetex-beta}, \code{texmf}, - \code{texmf-base}, and \code{texmf-extra} ensures that all the - required packages are available. (There may be a more minimal - set, but I've not spent time trying to minimize the installation.) + are located under the \menuselection{Interpreters} heading. The + \TeX{} packages are located under the \menuselection{Text} + heading, and are named \code{tetex-*}. To ensure that all + required packages are available, install every \code{tetex} + package, except \code{tetex-x11}. (There may be a more minimal + set, but I've not spent time trying to minimize the installation.) The netpbm package is used by \LaTeX2HTML, and \emph{must} be installed before \LaTeX2HTML can be successfully installed, even - though they will never be used for most Python documentation. - References to download locations are located in the \ulink{netpbm - README}{http://netpbm.sourceforge.net/README}. Install according - to the instructions. + though its features will not be used for most Python + documentation. References to download locations are located in + the \ulink{netpbm README}{http://netpbm.sourceforge.net/README}. + Install from the latest stable source distribution according to + the instructions. (Note that binary packages of netpbm are + sometimes available, but these may not work correctly with + \LaTeX2HTML.) \LaTeX2HTML can be installed from the source archive, but only - after munging one of the files in the distribution. Edit the file - \file{L2hos.pm} in the top level of the unpacked distribution; - near the bottom of the file, change the text - \code{\$\textasciicircum{}O} with the text \code{'unix'}. Proceed - using this command to build and install the software: + after munging one of the files in the distribution. Download the + source archive from the \LaTeX2HTML website + \url{http://www.latex2html.org/} (or one of the many alternate + sites) and unpack it to a build directory. In the top level of + this build directory there will be a file named \file{L2hos.pm}. + Open \file{L2hos.pm} in an editor, and near the bottom of the file + replace the text \code{\$\textasciicircum{}O} with the text + \code{'unix'}. Proceed using this command to build and install + the software: \begin{verbatim} % ./configure && make install From jhylton at users.sourceforge.net Sun Oct 16 07:24:30 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:30 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/api abstract.tex, 1.17.2.2, 1.17.2.3 concrete.tex, 1.17.2.2, 1.17.2.3 init.tex, 1.3.2.2, 1.3.2.3 newtypes.tex, 1.13.2.2, 1.13.2.3 refcounts.dat, 1.43.2.1, 1.43.2.2 utilities.tex, 1.5.2.2, 1.5.2.3 Message-ID: <20051016052430.8EF031E4008@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/api In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Doc/api Modified Files: Tag: ast-branch abstract.tex concrete.tex init.tex newtypes.tex refcounts.dat utilities.tex Log Message: Merge head to branch (for the last time) Index: abstract.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/api/abstract.tex,v retrieving revision 1.17.2.2 retrieving revision 1.17.2.3 diff -u -d -r1.17.2.2 -r1.17.2.3 --- abstract.tex 7 Jan 2005 06:56:52 -0000 1.17.2.2 +++ abstract.tex 16 Oct 2005 05:23:56 -0000 1.17.2.3 @@ -778,12 +778,6 @@ the Python statement \samp{del \var{o}[\var{i1}:\var{i2}]}. \end{cfuncdesc} -\begin{cfuncdesc}{PyObject*}{PySequence_Tuple}{PyObject *o} - Returns the \var{o} as a tuple on success, and \NULL{} on failure. - This is equivalent to the Python expression \samp{tuple(\var{o})}. - \bifuncindex{tuple} -\end{cfuncdesc} - \begin{cfuncdesc}{int}{PySequence_Count}{PyObject *o, PyObject *value} Return the number of occurrences of \var{value} in \var{o}, that is, return the number of keys for which \code{\var{o}[\var{key}] == @@ -811,9 +805,11 @@ \begin{cfuncdesc}{PyObject*}{PySequence_Tuple}{PyObject *o} Return a tuple object with the same contents as the arbitrary - sequence \var{o}. If \var{o} is a tuple, a new reference will be - returned, otherwise a tuple will be constructed with the appropriate - contents. + sequence \var{o} or \NULL{} on failure. If \var{o} is a tuple, + a new reference will be returned, otherwise a tuple will be + constructed with the appropriate contents. This is equivalent + to the Python expression \samp{tuple(\var{o})}. + \bifuncindex{tuple} \end{cfuncdesc} \begin{cfuncdesc}{PyObject*}{PySequence_Fast}{PyObject *o, const char *m} Index: concrete.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/api/concrete.tex,v retrieving revision 1.17.2.2 retrieving revision 1.17.2.3 diff -u -d -r1.17.2.2 -r1.17.2.3 --- concrete.tex 7 Jan 2005 06:56:52 -0000 1.17.2.2 +++ concrete.tex 16 Oct 2005 05:23:56 -0000 1.17.2.3 @@ -36,20 +36,20 @@ \end{cvardesc} \begin{cfuncdesc}{int}{PyType_Check}{PyObject *o} - Returns true if the object \var{o} is a type object, including - instances of types derived from the standard type object. Returns + Return true if the object \var{o} is a type object, including + instances of types derived from the standard type object. Return false in all other cases. \end{cfuncdesc} \begin{cfuncdesc}{int}{PyType_CheckExact}{PyObject *o} [...1572 lines suppressed...] + +\begin{cfuncdesc}{int}{PySet_Discard}{PyObject *set, PyObject *key} + Return 1 if found and removed, 0 if not found (no action taken), + and -1 if an error is encountered. Does not raise \exception{KeyError} + for missing keys. Raise a \exception{TypeError} if the \var{key} is + unhashable. Unlike the Python \method{discard()} method, this function + does not automatically convert unhashable sets into temporary frozensets. + Raise \exception{PyExc_SystemError} if \var{set} is an not an instance + of \class{set} or its subtype. +\end{cfuncdesc} + +\begin{cfuncdesc}{PyObject*}{PySet_Pop}{PyObject *set} + Return a new reference to an arbitrary object in the \var{set}, + and removes the object from the \var{set}. Return \NULL{} on + failure. Raise \exception{KeyError} if the set is empty. + Raise a \exception{SystemError} if \var{set} is an not an instance + of \class{set} or its subtype. +\end{cfuncdesc} + + Index: init.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/api/init.tex,v retrieving revision 1.3.2.2 retrieving revision 1.3.2.3 diff -u -d -r1.3.2.2 -r1.3.2.3 --- init.tex 7 Jan 2005 06:56:53 -0000 1.3.2.2 +++ init.tex 16 Oct 2005 05:23:56 -0000 1.3.2.3 @@ -131,6 +131,12 @@ objects may affect the wrong (sub-)interpreter's dictionary of loaded modules. (XXX This is a hard-to-fix bug that will be addressed in a future release.) + + Also note that the use of this functionality is incompatible with + extension modules such as PyObjC and ctypes that use the + \cfunction{PyGILState_*} APIs (and this is inherent in the way the + \cfunction{PyGILState_*} functions work). Simple things may work, + but confusing behavior will always be near. \end{cfuncdesc} \begin{cfuncdesc}{void}{Py_EndInterpreter}{PyThreadState *tstate} @@ -239,9 +245,8 @@ program name (set by \cfunction{Py_SetProgramName()} above) and some environment variables. The returned string consists of a series of directory names separated by a platform dependent delimiter - character. The delimiter character is \character{:} on \UNIX, - \character{;} on Windows, and \character{\e n} (the \ASCII{} - newline character) on Macintosh. The returned string points into + character. The delimiter character is \character{:} on \UNIX and Mac OS X, + \character{;} on Windows. The returned string points into static storage; the caller should not modify its value. The value is available to Python code as the list \code{sys.path}\withsubitem{(in module sys)}{\ttindex{path}}, which @@ -272,7 +277,7 @@ this is formed from the ``official'' name of the operating system, converted to lower case, followed by the major revision number; e.g., for Solaris 2.x, which is also known as SunOS 5.x, the value - is \code{'sunos5'}. On Macintosh, it is \code{'mac'}. On Windows, + is \code{'sunos5'}. On Mac OS X, it is \code{'darwin'}. On Windows, it is \code{'win'}. The returned string points into static storage; the caller should not modify its value. The value is available to Python code as \code{sys.platform}. @@ -455,19 +460,10 @@ pointer, release the lock, and finally free their thread state data structure. -When creating a thread data structure, you need to provide an -interpreter state data structure. The interpreter state data -structure holds global data that is shared by all threads in an -interpreter, for example the module administration -(\code{sys.modules}). Depending on your needs, you can either create -a new interpreter state data structure, or share the interpreter state -data structure used by the Python main thread (to access the latter, -you must obtain the thread state and access its \member{interp} member; -this must be done by a thread that is created by Python or by the main -thread after Python is initialized). - -Assuming you have access to an interpreter object, the typical idiom -for calling into Python from a C thread is +Beginning with version 2.3, threads can now take advantage of the +\cfunction{PyGILState_*()} functions to do all of the above +automatically. The typical idiom for calling into Python from a C +thread is now: \begin{verbatim} PyGILState_STATE gstate; @@ -481,6 +477,13 @@ PyGILState_Release(gstate); \end{verbatim} +Note that the \cfunction{PyGILState_*()} functions assume there is +only one global interpreter (created automatically by +\cfunction{Py_Initialize()}). Python still supports the creation of +additional interpreters (using \cfunction{Py_NewInterpreter()}), but +mixing multiple interpreters and the \cfunction{PyGILState_*()} API is +unsupported. + \begin{ctypedesc}{PyInterpreterState} This data structure represents the state shared by a number of cooperating threads. Threads belonging to the same interpreter @@ -700,16 +703,16 @@ \end{cfuncdesc} \begin{cfuncdesc}{PyGILState_STATE}{PyGILState_Ensure}{} -Ensure that the current thread is ready to call the Python -C API regardless of the current state of Python, or of its -thread lock. This may be called as many times as desired -by a thread as long as each call is matched with a call to -\cfunction{PyGILState_Release()}. -In general, other thread-related APIs may -be used between \cfunction{PyGILState_Ensure()} and \cfunction{PyGILState_Release()} calls as long as the -thread state is restored to its previous state before the Release(). -For example, normal usage of the \csimplemacro{Py_BEGIN_ALLOW_THREADS} -and \csimplemacro{Py_END_ALLOW_THREADS} macros is acceptable. +Ensure that the current thread is ready to call the Python C API +regardless of the current state of Python, or of its thread lock. +This may be called as many times as desired by a thread as long as +each call is matched with a call to \cfunction{PyGILState_Release()}. +In general, other thread-related APIs may be used between +\cfunction{PyGILState_Ensure()} and \cfunction{PyGILState_Release()} +calls as long as the thread state is restored to its previous state +before the Release(). For example, normal usage of the +\csimplemacro{Py_BEGIN_ALLOW_THREADS} and +\csimplemacro{Py_END_ALLOW_THREADS} macros is acceptable. The return value is an opaque "handle" to the thread state when \cfunction{PyGILState_Acquire()} was called, and must be passed to Index: newtypes.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/api/newtypes.tex,v retrieving revision 1.13.2.2 retrieving revision 1.13.2.3 diff -u -d -r1.13.2.2 -r1.13.2.3 --- newtypes.tex 7 Jan 2005 06:56:53 -0000 1.13.2.2 +++ newtypes.tex 16 Oct 2005 05:23:56 -0000 1.13.2.3 @@ -191,7 +191,7 @@ int ob_size; \end{verbatim} Note that \csimplemacro{PyObject_HEAD} is part of the expansion, and - that it's own expansion varies depending on the definition of + that its own expansion varies depending on the definition of \csimplemacro{Py_TRACE_REFS}. \end{csimplemacrodesc} @@ -1106,6 +1106,13 @@ \begin{cmemberdesc}{PyTypeObject}{descrgetfunc}{tp_descr_get} An optional pointer to a "descriptor get" function. + + The function signature is + +\begin{verbatim} +PyObject * tp_descr_get(PyObject *self, PyObject *obj, PyObject *type); +\end{verbatim} + XXX blah, blah. This field is inherited by subtypes. @@ -1114,9 +1121,16 @@ \begin{cmemberdesc}{PyTypeObject}{descrsetfunc}{tp_descr_set} An optional pointer to a "descriptor set" function. - XXX blah, blah. + The function signature is + +\begin{verbatim} +int tp_descr_set(PyObject *self, PyObject *obj, PyObject *value); +\end{verbatim} This field is inherited by subtypes. + + XXX blah, blah. + \end{cmemberdesc} \begin{cmemberdesc}{PyTypeObject}{long}{tp_dictoffset} Index: refcounts.dat =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/api/refcounts.dat,v retrieving revision 1.43.2.1 retrieving revision 1.43.2.2 diff -u -d -r1.43.2.1 -r1.43.2.2 --- refcounts.dat 28 Apr 2003 17:38:11 -0000 1.43.2.1 +++ refcounts.dat 16 Oct 2005 05:23:57 -0000 1.43.2.2 @@ -1009,6 +1009,24 @@ PySequence_Tuple:PyObject*::+1: PySequence_Tuple:PyObject*:o:0: +PySet_Append:int::: +PySet_Append:PyObject*:set:0: +PySet_Append:PyObject*:key:+1: + +PySet_Contains:int::: +PySet_Contains:PyObject*:anyset:0: +PySet_Contains:PyObject*:key:0: + +PySet_Discard:int::: +PySet_Discard:PyObject*:set:0: +PySet_Discard:PyObject*:key:-1:no effect if key not found + +PySet_Pop:PyObject*::0:or returns NULL and raises KeyError if set is empty +PySet_Pop:PyObject*:set:0: + +PySet_Size:int::: +PySet_Size:PyObject*:anyset:0: + PySlice_Check:int::: PySlice_Check:PyObject*:ob:0: Index: utilities.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/api/utilities.tex,v retrieving revision 1.5.2.2 retrieving revision 1.5.2.3 diff -u -d -r1.5.2.2 -r1.5.2.3 --- utilities.tex 7 Jan 2005 06:56:53 -0000 1.5.2.2 +++ utilities.tex 16 Oct 2005 05:23:57 -0000 1.5.2.3 @@ -34,7 +34,7 @@ Return true when the interpreter runs out of stack space. This is a reliable check, but is only available when \constant{USE_STACKCHECK} is defined (currently on Windows using the Microsoft Visual \Cpp{} - compiler and on the Macintosh). \constant{USE_CHECKSTACK} will be + compiler). \constant{USE_STACKCHECK} will be defined automatically; you should never change the definition in your own code. \end{cfuncdesc} @@ -539,7 +539,7 @@ Convert a Python integer to a C \ctype{long int}. \item[\samp{k} (integer) {[unsigned long]}] - Convert a Python integer to a C \ctype{unsigned long} without + Convert a Python integer or long integer to a C \ctype{unsigned long} without overflow checking. \versionadded{2.3} \item[\samp{L} (integer) {[PY_LONG_LONG]}] @@ -548,7 +548,7 @@ \ctype{_int64} on Windows). \item[\samp{K} (integer) {[unsigned PY_LONG_LONG]}] - Convert a Python integer to a C \ctype{unsigned long long} + Convert a Python integer or long integer to a C \ctype{unsigned long long} without overflow checking. This format is only available on platforms that support \ctype{unsigned long long} (or \ctype{unsigned _int64} on Windows). \versionadded{2.3} From jhylton at users.sourceforge.net Sun Oct 16 07:24:30 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:30 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/ext extending.tex, 1.21.2.2, 1.21.2.3 noddy2.c, 1.5.6.1, 1.5.6.2 run-func.c, 1.2.16.1, 1.2.16.2 Message-ID: <20051016052430.A3A0A1E4009@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/ext In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Doc/ext Modified Files: Tag: ast-branch extending.tex noddy2.c run-func.c Log Message: Merge head to branch (for the last time) Index: extending.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ext/extending.tex,v retrieving revision 1.21.2.2 retrieving revision 1.21.2.3 diff -u -d -r1.21.2.2 -r1.21.2.3 --- extending.tex 7 Jan 2005 06:57:02 -0000 1.21.2.2 +++ extending.tex 16 Oct 2005 05:23:57 -0000 1.21.2.3 @@ -374,7 +374,7 @@ \cdata{_PyImport_Inittab} table. The easiest way to handle this is to statically initialize your statically-linked modules by directly calling \cfunction{initspam()} after the call to -\cfunction{Py_Initialize()} or \cfunction{PyMac_Initialize()}: +\cfunction{Py_Initialize()}: \begin{verbatim} int @@ -426,7 +426,6 @@ (chapter \ref{building}) and additional information that pertains only to building on Windows (chapter \ref{building-on-windows}) for more information about this. -% XXX Add information about Mac OS If you can't use dynamic loading, or if you want to make your module a permanent part of the Python interpreter, you will have to change the @@ -1307,7 +1306,7 @@ /* C API functions */ #define PySpam_System_NUM 0 #define PySpam_System_RETURN int -#define PySpam_System_PROTO (char *command) +#define PySpam_System_PROTO (const char *command) /* Total number of C API pointers */ #define PySpam_API_pointers 1 Index: noddy2.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ext/noddy2.c,v retrieving revision 1.5.6.1 retrieving revision 1.5.6.2 diff -u -d -r1.5.6.1 -r1.5.6.2 --- noddy2.c 7 Jan 2005 06:57:02 -0000 1.5.6.1 +++ noddy2.c 16 Oct 2005 05:23:57 -0000 1.5.6.2 @@ -3,8 +3,8 @@ typedef struct { PyObject_HEAD - PyObject *first; - PyObject *last; + PyObject *first; /* first name */ + PyObject *last; /* last name */ int number; } Noddy; Index: run-func.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ext/run-func.c,v retrieving revision 1.2.16.1 retrieving revision 1.2.16.2 diff -u -d -r1.2.16.1 -r1.2.16.2 --- run-func.c 28 Apr 2003 17:38:05 -0000 1.2.16.1 +++ run-func.c 16 Oct 2005 05:23:57 -0000 1.2.16.2 @@ -20,11 +20,8 @@ Py_DECREF(pName); if (pModule != NULL) { - pDict = PyModule_GetDict(pModule); - /* pDict is a borrowed reference */ - - pFunc = PyDict_GetItemString(pDict, argv[2]); - /* pFun: Borrowed reference */ + pFunc = PyDict_GetItemString(pModule, argv[2]); + /* pFunc is a new reference */ if (pFunc && PyCallable_Check(pFunc)) { pArgs = PyTuple_New(argc - 3); @@ -46,18 +43,19 @@ Py_DECREF(pValue); } else { + Py_DECREF(pFunc); Py_DECREF(pModule); PyErr_Print(); fprintf(stderr,"Call failed\n"); return 1; } - /* pDict and pFunc are borrowed and must not be Py_DECREF-ed */ } else { if (PyErr_Occurred()) PyErr_Print(); fprintf(stderr, "Cannot find function \"%s\"\n", argv[2]); } + Py_XDECREF(pFunc); Py_DECREF(pModule); } else { From jhylton at users.sourceforge.net Sun Oct 16 07:24:30 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:30 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/dist dist.tex, 1.42.2.2, 1.42.2.3 Message-ID: <20051016052430.B2E8C1E400C@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/dist In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Doc/dist Modified Files: Tag: ast-branch dist.tex Log Message: Merge head to branch (for the last time) Index: dist.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/dist/dist.tex,v retrieving revision 1.42.2.2 retrieving revision 1.42.2.3 diff -u -d -r1.42.2.2 -r1.42.2.3 --- dist.tex 7 Jan 2005 06:56:53 -0000 1.42.2.2 +++ dist.tex 16 Oct 2005 05:23:57 -0000 1.42.2.3 @@ -25,6 +25,9 @@ \begin{document} \maketitle + +\input{copyright} + \begin{abstract} \noindent This document describes the Python Distribution Utilities @@ -298,7 +301,7 @@ current platform before actually using the pathname. This makes your setup script portable across operating systems, which of course is one of the major goals of the Distutils. In this spirit, all pathnames in -this document are slash-separated. (Mac OS programmers should keep in +this document are slash-separated. (Mac OS 9 programmers should keep in mind that the \emph{absence} of a leading slash indicates a relative path, the opposite of the Mac OS convention with colons.) @@ -403,7 +406,7 @@ with, etc.). All of this is done through another keyword argument to -\function{setup()}, the \option{extensions} option. \option{extensions} +\function{setup()}, the \option{ext_modules} option. \option{ext_modules} is just a list of \class{Extension} instances, each of which describes a single extension module. Suppose your distribution includes a single extension, called \module{foo} and implemented by \file{foo.c}. If no @@ -631,7 +634,83 @@ will automatically add \code{initmodule} to the list of exported symbols. +\section{Relationships between Distributions and Packages} + +A distribution may relate to packages in three specific ways: + +\begin{enumerate} + \item It can require packages or modules. + + \item It can provide packages or modules. + + \item It can obsolete packages or modules. +\end{enumerate} + +These relationships can be specified using keyword arguments to the +\function{distutils.core.setup()} function. + +Dependencies on other Python modules and packages can be specified by +supplying the \var{requires} keyword argument to \function{setup()}. +The value must be a list of strings. Each string specifies a package +that is required, and optionally what versions are sufficient. + +To specify that any version of a module or package is required, the +string should consist entirely of the module or package name. +Examples include \code{'mymodule'} and \code{'xml.parsers.expat'}. + +If specific versions are required, a sequence of qualifiers can be +supplied in parentheses. Each qualifier may consist of a comparison +operator and a version number. The accepted comparison operators are: + +\begin{verbatim} +< > == +<= >= != +\end{verbatim} + +These can be combined by using multiple qualifiers separated by commas +(and optional whitespace). In this case, all of the qualifiers must +be matched; a logical AND is used to combine the evaluations. + +Let's look at a bunch of examples: + +\begin{tableii}{l|l}{code}{Requires Expression}{Explanation} + \lineii{==1.0} {Only version \code{1.0} is compatible} + \lineii{>1.0, !=1.5.1, <2.0} {Any version after \code{1.0} and before + \code{2.0} is compatible, except + \code{1.5.1}} +\end{tableii} + +Now that we can specify dependencies, we also need to be able to +specify what we provide that other distributions can require. This is +done using the \var{provides} keyword argument to \function{setup()}. +The value for this keyword is a list of strings, each of which names a +Python module or package, and optionally identifies the version. If +the version is not specified, it is assumed to match that of the +distribution. + +Some examples: + +\begin{tableii}{l|l}{code}{Provides Expression}{Explanation} + \lineii{mypkg} {Provide \code{mypkg}, using the distribution version} + \lineii{mypkg (1.1} {Provide \code{mypkg} version 1.1, regardless of the + distribution version} +\end{tableii} + +A package can declare that it obsoletes other packages using the +\var{obsoletes} keyword argument. The value for this is similar to +that of the \var{requires} keyword: a list of strings giving module or +package specifiers. Each specifier consists of a module or package +name optionally followed by one or more version qualifiers. Version +qualifiers are given in parentheses after the module or package name. + +The versions identified by the qualifiers are those that are obsoleted +by the distribution being described. If no qualifiers are given, all +versions of the named module or package are understood to be +obsoleted. + + \section{Installing Scripts} + So far we have been dealing with pure and non-pure Python modules, which are usually not run by themselves but imported by scripts. @@ -1021,7 +1100,6 @@ script or config file), \command{sdist} creates the archive of the default format for the current platform. The default format is a gzip'ed tar file (\file{.tar.gz}) on \UNIX, and ZIP file on Windows. -\XXX{no Mac OS support here} You can specify as many formats as you like using the \longprogramopt{formats} option, for example: @@ -1579,7 +1657,7 @@ iconpath\optional{, iconindex}}}}} This function creates a shortcut. \var{target} is the path to the program to be started by the shortcut. - \var{description} is the description of the sortcut. + \var{description} is the description of the shortcut. \var{filename} is the title of the shortcut that the user will see. \var{arguments} specifies the command line arguments, if any. \var{workdir} is the working directory for the program. @@ -1640,7 +1718,43 @@ versions, the Hidden property should be set to yes. This must be edited through the web interface. +\section{The .pypirc file} +\label{pypirc} + +The format of the \file{.pypirc} file is formated as follows: + +\begin{verbatim} +[server-login] +repository: +username: +password: +\end{verbatim} + +\var{repository} can be ommitted and defaults to +\code{http://www.python.org/pypi}. + +\chapter{Uploading Packages to the Package Index} +\label{package-upload} + +The Python Package Index (PyPI) not only stores the package info, but also +the package data if the author of the package wishes to. The distutils +command \command{upload} pushes the distribution files to PyPI. + +The command is invoked immediately after building one or more distribution +files. For example, the command +\begin{verbatim} +python setup.py sdist bdist_wininst upload +\end{verbatim} + +will cause the source distribution and the Windows installer to be +uploaded to PyPI. Note that these will be uploaded even if they are +built using an earlier invocation of \file{setup.py}, but that only +distributions named on the command line for the invocation including +the \command{upload} command are uploaded. + +The \command{upload} command uses the username and password stored in +the file \file{\$HOME/.pypirc}, see section~\ref{pypirc}. \chapter{Examples} \label{examples} @@ -1980,6 +2094,14 @@ implemented by the class \class{distcmds.bdist_openpkg.bdist_openpkg} or \class{buildcmds.bdist_openpkg.bdist_openpkg}. +\section{Adding new distribution types} + +Commands that create distributions (files in the \file{dist/} +directory) need to add \code{(\var{command}, \var{filename})} pairs to +\code{self.distribution.dist_files} so that \command{upload} can +upload it to PyPI. The \var{filename} in the pair contains no path +information, only the name of the file itself. In dry-run mode, pairs +should still be added to represent what would have been created. \chapter{Command Reference} \label{reference} @@ -2059,9 +2181,9 @@ characters in \var{range} (e.g., \code{a-z}, \code{a-zA-Z}, \code{a-f0-9\_.}). The definition of ``regular filename character'' is platform-specific: on \UNIX{} it is anything except slash; on Windows -anything except backslash or colon; on Mac OS anything except colon. +anything except backslash or colon; on Mac OS 9 anything except colon. -\XXX{Windows and Mac OS support not there yet} +\XXX{Windows support not there yet} %\section{Creating a built distribution: the @@ -2135,9 +2257,9 @@ the contents of the config files or command-line. \var{script_name} is a file that will be run with \function{execfile()} -\var{sys.argv[0]} will be replaced with \var{script} for the duration of the +\code{sys.argv[0]} will be replaced with \var{script} for the duration of the call. \var{script_args} is a list of strings; if supplied, -\var{sys.argv[1:]} will be replaced by \var{script_args} for the duration +\code{sys.argv[1:]} will be replaced by \var{script_args} for the duration of the call. \var{stop_after} tells \function{setup()} when to stop processing; possible @@ -2172,7 +2294,7 @@ \begin{classdesc*}{Extension} The Extension class describes a single C or \Cpp extension module in a -setup script. It accepts the following keyword arguments in it's +setup script. It accepts the following keyword arguments in its constructor \begin{tableiii}{c|l|l}{argument name}{argument name}{value}{type} @@ -2232,7 +2354,7 @@ \end{classdesc*} \begin{classdesc*}{Command} -A \class{Command} class (or rather, an instance of one of it's subclasses) +A \class{Command} class (or rather, an instance of one of its subclasses) implement a single distutils command. \end{classdesc*} @@ -2258,22 +2380,24 @@ \end{funcdesc} \begin{funcdesc}{gen_preprocess_options}{macros, include_dirs} -Generate C pre-processor options (-D, -U, -I) as used by at least +Generate C pre-processor options (\programopt{-D}, \programopt{-U}, +\programopt{-I}) as used by at least two types of compilers: the typical \UNIX{} compiler and Visual \Cpp. -\var{macros} is the usual thing, a list of 1- or 2-tuples, where \var{(name,)} -means undefine (-U) macro \var{name}, and \var{(name,value)} means define (-D) -macro \var{name} to \var{value}. \var{include_dirs} is just a list of directory -names to be added to the header file search path (-I). Returns a list -of command-line options suitable for either \UNIX{} compilers or Visual -\Cpp. +\var{macros} is the usual thing, a list of 1- or 2-tuples, where +\code{(\var{name},)} means undefine (\programopt{-U}) macro \var{name}, +and \code{(\var{name}, \var{value})} means define (\programopt{-D}) +macro \var{name} to \var{value}. \var{include_dirs} is just a list of +directory names to be added to the header file search path (\programopt{-I}). +Returns a list of command-line options suitable for either \UNIX{} compilers +or Visual \Cpp. \end{funcdesc} \begin{funcdesc}{get_default_compiler}{osname, platform} Determine the default compiler to use for the given platform. -\var{osname} should be one of the standard Python OS names (i.e. the -ones returned by \var{os.name}) and \var{platform} the common value -returned by \var{sys.platform} for the platform in question. +\var{osname} should be one of the standard Python OS names (i.e.\ the +ones returned by \code{os.name}) and \var{platform} the common value +returned by \code{sys.platform} for the platform in question. The default values are \code{os.name} and \code{sys.platform} in case the parameters are not given. @@ -2319,7 +2443,7 @@ (don't actually execute the steps) and \var{force} (rebuild everything, regardless of dependencies). All of these flags default to \code{0} (off). Note that you probably don't want to instantiate -\class{CCompiler} or one of it's subclasses directly - use the +\class{CCompiler} or one of its subclasses directly - use the \function{distutils.CCompiler.new_compiler()} factory function instead. @@ -2505,7 +2629,8 @@ \file{build/foo/bar.o}. \var{macros}, if given, must be a list of macro definitions. A macro -definition is either a \var{(name, value)} 2-tuple or a \var{(name,)} 1-tuple. +definition is either a \code{(\var{name}, \var{value})} 2-tuple or a +\code{(\var{name},)} 1-tuple. The former defines a macro; if the value is \code{None}, the macro is defined without an explicit value. The 1-tuple case undefines a macro. Later definitions/redefinitions/undefinitions take @@ -2518,7 +2643,7 @@ \var{debug} is a boolean; if true, the compiler will be instructed to output debug symbols in (or alongside) the object file(s). -\var{extra_preargs} and \var{extra_postargs} are implementation- dependent. +\var{extra_preargs} and \var{extra_postargs} are implementation-dependent. On platforms that have the notion of a command-line (e.g. \UNIX, DOS/Windows), they are most likely lists of strings: extra command-line arguments to prepend/append to the compiler command @@ -2759,7 +2884,8 @@ \modulesynopsis{Metrowerks CodeWarrior support} Contains \class{MWerksCompiler}, an implementation of the abstract -\class{CCompiler} class for MetroWerks CodeWarrior on the Macintosh. Needs work to support CW on Windows. +\class{CCompiler} class for MetroWerks CodeWarrior on the pre-Mac OS X Macintosh. +Needs work to support CW on Windows or Mac OS X. %\subsection{Utility modules} @@ -2791,8 +2917,8 @@ \end{funcdesc} \begin{funcdesc}{make_tarball}{base_name, base_dir\optional{, compress=\code{'gzip'}, verbose=\code{0}, dry_run=\code{0}}}'Create an (optional compressed) archive as a tar file from all files in and under \var{base_dir}. \var{compress} must be \code{'gzip'} (the default), -\code{'compress'}, \code{'bzip2'}, or \code{None}. Both \code{'tar'} -and the compression utility named by \var{'compress'} must be on the +\code{'compress'}, \code{'bzip2'}, or \code{None}. Both \program{tar} +and the compression utility named by \var{compress} must be on the default program search path, so this is probably \UNIX-specific. The output tar file will be named \file{\var{base_dir}.tar}, possibly plus the appropriate compression extension (\file{.gz}, \file{.bz2} or @@ -2881,7 +3007,7 @@ Copy an entire directory tree \var{src} to a new location \var{dst}. Both \var{src} and \var{dst} must be directory names. If \var{src} is not a directory, raise \exception{DistutilsFileError}. If \var{dst} does -not exist, it is created with \var{mkpath()}. The end result of the +not exist, it is created with \function{mkpath()}. The end result of the copy is that every file in \var{src} is copied to \var{dst}, and directories under \var{src} are recursively copied to \var{dst}. Return the list of files that were copied or might have been copied, @@ -2901,7 +3027,7 @@ \begin{funcdesc}{remove_tree}{directory\optional{verbose=\code{0}, dry_run=\code{0}}} Recursively remove \var{directory} and all files and directories underneath -it. Any errors are ignored (apart from being reported to \code{stdout} if +it. Any errors are ignored (apart from being reported to \code{sys.stdout} if \var{verbose} is true). \end{funcdesc} @@ -2929,7 +3055,7 @@ to \code{'hard'} or \code{'sym'}; if it is \code{None} (the default), files are copied. Don't set \var{link} on systems that don't support it: \function{copy_file()} doesn't check if hard or symbolic linking is -available. It uses \var{_copy_file_contents()} to copy file contents. +available. It uses \function{_copy_file_contents()} to copy file contents. Return a tuple \samp{(dest_name, copied)}: \var{dest_name} is the actual name of the output file, and \var{copied} is true if the file was copied @@ -2999,7 +3125,7 @@ Return \var{pathname} with \var{new_root} prepended. If \var{pathname} is relative, this is equivalent to \samp{os.path.join(new_root,pathname)} Otherwise, it requires making \var{pathname} relative and then joining the -two, which is tricky on DOS/Windows and Mac OS. +two, which is tricky on DOS/Windows. \end{funcdesc} \begin{funcdesc}{check_environ}{} @@ -3197,7 +3323,7 @@ The option_table is a list of 3-tuples: \samp{(long_option, short_option, help_string)} -If an option takes an argument, it's \var{long_option} should have \code{'='} +If an option takes an argument, its \var{long_option} should have \code{'='} appended; \var{short_option} should just be a single character, no \code{':'} in any case. \var{short_option} should be \code{None} if a \var{long_option} doesn't have a corresponding \var{short_option}. All option tuples must have @@ -3294,11 +3420,11 @@ something that provides \method{readline()} and \method{close()} methods). It is recommended that you supply at least \var{filename}, so that \class{TextFile} can include it in warning messages. If -\var{file} is not supplied, TextFile creates its own using the -\var{open()} builtin. +\var{file} is not supplied, \class{TextFile} creates its own using the +\function{open()} built-in function. The options are all boolean, and affect the values returned by -\var{readline()} +\method{readline()} \begin{tableiii}{c|l|l}{option name}{option name}{description}{default} \lineiii{strip_comments}{ From jhylton at users.sourceforge.net Sun Oct 16 07:24:30 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:30 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/inst inst.tex, 1.40.2.2, 1.40.2.3 Message-ID: <20051016052430.D17D61E4010@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/inst In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Doc/inst Modified Files: Tag: ast-branch inst.tex Log Message: Merge head to branch (for the last time) Index: inst.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/inst/inst.tex,v retrieving revision 1.40.2.2 retrieving revision 1.40.2.3 diff -u -d -r1.40.2.2 -r1.40.2.3 --- inst.tex 7 Jan 2005 06:57:03 -0000 1.40.2.2 +++ inst.tex 16 Oct 2005 05:23:57 -0000 1.40.2.3 @@ -142,7 +142,7 @@ On \UNIX, you'd run this command from a shell prompt; on Windows, you have to open a command prompt window (``DOS box'') and do it there; on -Mac OS, things are a tad more complicated (see below). +Mac OS X, you open a \command{Terminal} window to get a shell prompt. \subsection{Platform variations} @@ -262,7 +262,8 @@ \code{setup.py install}---then the \command{install} command installs to the standard location for third-party Python modules. This location varies by platform and by how you built/installed Python itself. On -\UNIX{} and Mac OS, it also depends on whether the module distribution +\UNIX{} (and Mac OS X, which is also Unix-based), +it also depends on whether the module distribution being installed is pure Python or contains extensions (``non-pure''): \begin{tableiv}{l|l|l|c}{textrm}% {Platform}{Standard installation location}{Default value}{Notes} @@ -278,14 +279,6 @@ {\filenq{\filevar{prefix}}} {\filenq{C:\textbackslash{}Python}} {(2)} - \lineiv{Mac OS (pure)} - {\filenq{\filevar{prefix}:Lib:site-packages}} - {\filenq{Python:Lib:site-packages}} - {} - \lineiv{Mac OS (non-pure)} - {\filenq{\filevar{prefix}:Lib:site-packages}} - {\filenq{Python:Lib:site-packages}} - {} \end{tableiv} \noindent Notes: @@ -302,8 +295,8 @@ \filevar{prefix} and \filevar{exec-prefix} stand for the directories that Python is installed to, and where it finds its libraries at -run-time. They are always the same under Windows and Mac OS, and very -often the same under \UNIX. You can find out what your Python +run-time. They are always the same under Windows, and very +often the same under \UNIX and Mac OS X. You can find out what your Python installation uses for \filevar{prefix} and \filevar{exec-prefix} by running Python in interactive mode and typing a few simple commands. Under \UNIX, just type \code{python} at the shell prompt. Under @@ -658,7 +651,7 @@ variables supplied by the Distutils are the only ones you can use.) See section~\ref{config-files} for details. -% XXX need some Windows and Mac OS examples---when would custom +% XXX need some Windows examples---when would custom % installation schemes be needed on those platforms? @@ -764,8 +757,8 @@ \label{config-filenames} The names and locations of the configuration files vary slightly across -platforms. On \UNIX, the three configuration files (in the order they -are processed) are: +platforms. On \UNIX{} and Mac OS X, the three configuration files (in +the order they are processed) are: \begin{tableiii}{l|l|c}{textrm} {Type of file}{Location and filename}{Notes} \lineiii{system}{\filenq{\filevar{prefix}/lib/python\filevar{ver}/distutils/distutils.cfg}}{(1)} @@ -773,7 +766,7 @@ \lineiii{local}{\filenq{setup.cfg}}{(3)} \end{tableiii} -On Windows, the configuration files are: +And on Windows, the configuration files are: \begin{tableiii}{l|l|c}{textrm} {Type of file}{Location and filename}{Notes} \lineiii{system}{\filenq{\filevar{prefix}\textbackslash{}Lib\textbackslash{}distutils\textbackslash{}distutils.cfg}}{(4)} @@ -781,14 +774,6 @@ \lineiii{local}{\filenq{setup.cfg}}{(3)} \end{tableiii} -And on Mac OS, they are: -\begin{tableiii}{l|l|c}{textrm} - {Type of file}{Location and filename}{Notes} - \lineiii{system}{\filenq{\filevar{prefix}:Lib:distutils:distutils.cfg}}{(6)} - \lineiii{personal}{N/A}{} - \lineiii{local}{\filenq{setup.cfg}}{(3)} -\end{tableiii} - \noindent Notes: \begin{description} \item[(1)] Strictly speaking, the system-wide configuration file lives @@ -818,9 +803,6 @@ defined, no personal configuration file will be found or used. (In other words, the Distutils make no attempt to guess your home directory on Windows.) -\item[(6)] (See also notes (1) and (4).) The default installation - prefix is just \file{Python:}, so under Python 1.6 and later this is - normally\file{Python:Lib:distutils:distutils.cfg}. \end{description} From jhylton at users.sourceforge.net Sun Oct 16 07:24:31 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:31 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/perl l2hinit.perl, 1.63.2.2, 1.63.2.3 Message-ID: <20051016052431.525A91E4008@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/perl In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Doc/perl Modified Files: Tag: ast-branch l2hinit.perl Log Message: Merge head to branch (for the last time) Index: l2hinit.perl =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/perl/l2hinit.perl,v retrieving revision 1.63.2.2 retrieving revision 1.63.2.3 diff -u -d -r1.63.2.2 -r1.63.2.3 --- l2hinit.perl 7 Jan 2005 06:57:32 -0000 1.63.2.2 +++ l2hinit.perl 16 Oct 2005 05:23:58 -0000 1.63.2.3 @@ -98,7 +98,7 @@ $dir =~ s/$dd$//; $TEXINPUTS = "$dir$envkey$mytexinputs"; # Push everything into $TEXINPUTS since LaTeX2HTML doesn't pick - # this up on it's own; we clear $ENV{'TEXINPUTS'} so the value set + # this up on its own; we clear $ENV{'TEXINPUTS'} so the value set # for this by the main LaTeX2HTML script doesn't contain duplicate # directories. if ($ENV{'TEXINPUTS'}) { From jhylton at users.sourceforge.net Sun Oct 16 07:24:31 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:31 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/tools getpagecounts, 1.8.2.1, 1.8.2.2 mkpkglist, 1.1.8.2, 1.1.8.3 py2texi.el, 1.2.2.2, 1.2.2.3 Message-ID: <20051016052431.035AC1E4015@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/tools In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Doc/tools Modified Files: Tag: ast-branch getpagecounts mkpkglist py2texi.el Log Message: Merge head to branch (for the last time) Index: getpagecounts =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/tools/getpagecounts,v retrieving revision 1.8.2.1 retrieving revision 1.8.2.2 diff -u -d -r1.8.2.1 -r1.8.2.2 --- getpagecounts 7 Jan 2005 06:57:36 -0000 1.8.2.1 +++ getpagecounts 16 Oct 2005 05:23:58 -0000 1.8.2.2 @@ -65,7 +65,7 @@ of it! To locate published copies of the larger manuals, or other Python reference material, consult the Python Bookstore at: - http://www.python.org/cgi-bin/moinmoin/PythonBooks + http://wiki.python.org/moin/PythonBooks The following manuals are included in this package: """ Index: mkpkglist =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/tools/mkpkglist,v retrieving revision 1.1.8.2 retrieving revision 1.1.8.3 diff -u -d -r1.1.8.2 -r1.1.8.3 --- mkpkglist 7 Jan 2005 06:57:37 -0000 1.1.8.2 +++ mkpkglist 16 Oct 2005 05:23:58 -0000 1.1.8.3 @@ -67,14 +67,17 @@ have_tgz = isfile(tgz_fn) have_bz2 = isfile(bz2_fn) - if have_zip or have_tgz or have_bz2: - print " %s" % name - - print get_file_cell(prefix, ".zip", have_zip) - print get_file_cell(prefix, ".tgz", have_tgz) - print get_file_cell(prefix, ".tar.bz2", have_bz2) + have_some = have_zip or have_tgz or have_bz2 - print " " + if not have_some: + print " " print '''\ Index: py2texi.el =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/tools/py2texi.el,v retrieving revision 1.2.2.2 retrieving revision 1.2.2.3 diff -u -d -r1.2.2.2 -r1.2.2.3 --- py2texi.el 7 Jan 2005 06:57:37 -0000 1.2.2.2 +++ py2texi.el 16 Oct 2005 05:23:58 -0000 1.2.2.3 @@ -238,7 +238,7 @@ ("dataline" 1 (progn (setq findex t) "@item \\1\n at findex \\1\n")) ("date" 1 "\\1") ("declaremodule" 2 (progn (setq cindex t) "@label{\\2}@cindex{\\2}")) - ("deprecated" 2 "@emph{This is deprecated in Python \\1. \\2}") + ("deprecated" 2 "@emph{This is deprecated in Python \\1. \\2}\n\n") ("dfn" 1 "@dfn{\\1}") ("documentclass" 1 py2texi-magic) ("e" 0 "@backslash{}") @@ -260,6 +260,7 @@ ("funclineni" 2 "@item \\1 \\2") ("function" 1 "@code{\\1}") ("grammartoken" 1 "@code{\\1}") + ("guilabel" 1 "@strong{\\1}") ("hline" 0 "") ("ifhtml" 0 (concat "@" (setq last-if "ifinfo"))) ("iftexi" 0 (concat "@" (setq last-if "ifinfo"))) @@ -278,6 +279,7 @@ ("large" 0 "") ("ldots" 0 "@dots{}") ("leftline" 1 "\\1") + ("leq" 0 "<=") ("lineii" 2 "@item \\1 @tab \\2") ("lineiii" 3 "@item \\1 @tab \\2 @tab \\3") ("lineiv" 4 "@item \\1 @tab \\2 @tab \\3 @tab \\4") @@ -353,6 +355,7 @@ (py2texi-backslash-quote (match-string 2 str))) "@node \\1\n at section \\1\n")))) ("sectionauthor" 2 "") + ("seelink" 3 "\n at table @url\n at item @strong{\\1}\n(\\2)\n\\3\n at end table\n") ("seemodule" 2 "@ref{\\1} \\2") ("seepep" 3 "\n at table @strong\n at item PEP\\1 \\2\n\\3\n at end table\n") ("seerfc" 3 "\n at table @strong\n at item RFC\\1 \\2\n\\3\n at end table\n") @@ -378,7 +381,15 @@ ("textasciitilde" 0 "~") ("textasciicircum" 0 "^") ("textbackslash" 0 "@backslash{}") + ("textbar" 0 "|") + ; Some common versions of Texinfo don't support @euro yet: + ; ("texteuro" 0 "@euro{}") + ; Unfortunately, this alternate spelling doesn't actually apply to + ; the usage found in Python Tutorial, which actually requires a + ; Euro symbol to make sense, so this is commented out as well. + ; ("texteuro" 0 "Euro ") ("textgreater" 0 ">") + ("textit" 1 "@i{\\1}") ("textless" 0 "<") ("textrm" 1 "\\1") ("texttt" 1 "@code{\\1}") From jhylton at users.sourceforge.net Sun Oct 16 07:24:31 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:31 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Grammar Grammar, 1.47.2.3, 1.47.2.4 Message-ID: <20051016052431.E980A1E4014@bag.python.org> Update of /cvsroot/python/python/dist/src/Grammar In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Grammar Modified Files: Tag: ast-branch Grammar Log Message: Merge head to branch (for the last time) Index: Grammar =================================================================== RCS file: /cvsroot/python/python/dist/src/Grammar/Grammar,v retrieving revision 1.47.2.3 retrieving revision 1.47.2.4 diff -u -d -r1.47.2.3 -r1.47.2.4 --- Grammar 15 Apr 2005 02:18:25 -0000 1.47.2.3 +++ Grammar 16 Oct 2005 05:23:58 -0000 1.47.2.4 @@ -39,7 +39,7 @@ stmt: simple_stmt | compound_stmt simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | exec_stmt | assert_stmt -expr_stmt: testlist (augassign testlist | ('=' testlist)*) +expr_stmt: testlist (augassign (yield_expr|testlist) | ('=' (yield_expr|testlist))*) augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//=' # For normal assignments, additional restrictions enforced by the interpreter print_stmt: 'print' ( [ test (',' test)* [','] ] | '>>' test [ (',' test)+ [','] ] ) @@ -49,7 +49,7 @@ break_stmt: 'break' continue_stmt: 'continue' return_stmt: 'return' [testlist] -yield_stmt: 'yield' testlist +yield_stmt: yield_expr raise_stmt: 'raise' [test [',' test [',' test]]] import_stmt: import_name | import_from import_name: 'import' dotted_as_names @@ -86,7 +86,7 @@ term: factor (('*'|'/'|'%'|'//') factor)* factor: ('+'|'-'|'~') factor | power power: atom trailer* ['**' factor] -atom: '(' [testlist_gexp] ')' | '[' [listmaker] ']' | '{' [dictmaker] '}' | '`' testlist1 '`' | NAME | NUMBER | STRING+ +atom: '(' [yield_expr|testlist_gexp] ')' | '[' [listmaker] ']' | '{' [dictmaker] '}' | '`' testlist1 '`' | NAME | NUMBER | STRING+ listmaker: test ( list_for | (',' test)* [','] ) testlist_gexp: test ( gen_for | (',' test)* [','] ) lambdef: 'lambda' [varargslist] ':' test @@ -116,3 +116,6 @@ # not used in grammar, but may appear in "node" passed from Parser to Compiler encoding_decl: NAME + +yield_expr: 'yield' [testlist] + From jhylton at users.sourceforge.net Sun Oct 16 07:24:32 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:32 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/texinputs underscore.sty, NONE, 1.1.2.2 howto.cls, 1.13.2.1, 1.13.2.2 manual.cls, 1.18.2.1, 1.18.2.2 python.sty, 1.96.2.2, 1.96.2.3 Message-ID: <20051016052432.404201E400D@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/texinputs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Doc/texinputs Modified Files: Tag: ast-branch howto.cls manual.cls python.sty Added Files: Tag: ast-branch underscore.sty Log Message: Merge head to branch (for the last time) --- NEW FILE: underscore.sty --- % underscore.sty 12-Oct-2001 Donald Arseneau asnd at triumf.ca % Make the "_" character print as "\textunderscore" in text. % Copyright 1998,2001 Donald Arseneau; Distribute freely if unchanged. % Instructions follow after the definitions. \ProvidesPackage{underscore}[2001/10/12] \begingroup \catcode`\_=\active \gdef_{% \relax % No relax gives a small vulnerability in alignments \ifx\if at safe@actives\iftrue % must be outermost test! \string_% \else \ifx\protect\@typeset at protect \ifmmode \sb \else \BreakableUnderscore \fi \else \ifx\protect\@unexpandable at protect \noexpand_% \else \protect_% \fi\fi \fi} \endgroup % At begin: set catcode; fix \long \ttdefault so I can use it in comparisons; \AtBeginDocument{% {\immediate\write\@auxout{\catcode\number\string`\_ \string\active}}% \catcode\string`\_\string=\active \edef\ttdefault{\ttdefault}% } \newcommand{\BreakableUnderscore}{\leavevmode\nobreak\hskip\z at skip \ifx\f at family\ttdefault \string_\else \textunderscore\fi \usc at dischyph\nobreak\hskip\z at skip} \DeclareRobustCommand{\_}{% \ifmmode \nfss at text{\textunderscore}\else \BreakableUnderscore \fi} \let\usc at dischyph\@dischyph \DeclareOption{nohyphen}{\def\usc at dischyph{\discretionary{}{}{}}} \DeclareOption{strings}{\catcode`\_=\active} \ProcessOptions \ifnum\catcode`\_=\active\else \endinput \fi %%%%%%%% Redefine commands that use character strings %%%%%%%% \@ifundefined{UnderscoreCommands}{\let\UnderscoreCommands\@empty}{} \expandafter\def\expandafter\UnderscoreCommands\expandafter{% \UnderscoreCommands \do\include \do\includeonly \do\@input \do\@iinput \do\InputIfFileExists \do\ref \do\pageref \do\newlabel \do\bibitem \do\@bibitem \do\cite \do\nocite \do\bibcite } % Macro to redefine a macro to pre-process its string argument % with \protect -> \string. \def\do#1{% Avoid double processing if user includes command twice! \@ifundefined{US\string_\expandafter\@gobble\string#1}{% \edef\@tempb{\meaning#1}% Check if macro is just a protection shell... \def\@tempc{\protect}% \edef\@tempc{\meaning\@tempc\string#1\space\space}% \ifx\@tempb\@tempc % just a shell: hook into the protected inner command \expandafter\do \csname \expandafter\@gobble\string#1 \expandafter\endcsname \else % Check if macro takes an optional argument \def\@tempc{\@ifnextchar[}% \edef\@tempa{\def\noexpand\@tempa####1\meaning\@tempc}% \@tempa##2##3\@tempa{##2\relax}% \edef\@tempb{\meaning#1\meaning\@tempc}% \edef\@tempc{\noexpand\@tempd \csname US\string_\expandafter\@gobble\string#1\endcsname}% \if \expandafter\@tempa\@tempb \relax 12\@tempa % then no optional arg \@tempc #1\US at prot \else % There is optional arg \@tempc #1\US at protopt \fi \fi }{}} \def\@tempd#1#2#3{\let#1#2\def#2{#3#1}} \def\US at prot#1#2{\let\@@protect\protect \let\protect\string \edef\US at temp##1{##1{#2}}\restore at protect\US at temp#1} \def\US at protopt#1{\@ifnextchar[{\US at protarg#1}{\US at prot#1}} \def\US at protarg #1[#2]{\US at prot{{#1[#2]}}} \UnderscoreCommands \let\do\relax \let\@tempd\relax % un-do %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \endinput underscore.sty 12-Oct-2001 Donald Arseneau Features: ~~~~~~~~~ \_ prints an underscore so that the hyphenation of constituent words is not affected and hyphenation is permitted after the underscore. For example, "compound\_fracture" hyphenates as com- pound_- frac- ture. If you prefer the underscore to break without a hyphen (but still with the same rules for explicit hyphen-breaks) then use the [nohyphen] package option. A simple _ acts just like \_ in text mode, but makes a subscript in math mode: activation_energy $E_a$ Both forms use an underscore character if the font encoding contains one (e.g., "\usepackage[T1]{fontenc}" or typewriter fonts in any encoding), but they use a rule if the there is no proper character. Deficiencies: ~~~~~~~~~~~~~ The skips and penalties ruin any kerning with the underscore character (when a character is used). However, there doesn't seem to be much, if any, such kerning in the ec fonts, and there is never any kerning with a rule. You must avoid "_" in file names and in cite or ref tags, or you must use the babel package, with its active-character controls, or you must give the [strings] option, which attempts to redefine several commands (and may not work perfectly). Even without the [strings] option or babel, you can use occasional underscores like: "\include{file\string_name}". Option: [strings] ~~~~~~~~~~~~~~~~~ The default operation is quite simple and needs no customization; but you must avoid using "_" in any place where LaTeX uses an argument as a string of characters for some control function or as a name. These include the tags for \cite and \ref, file names for \input, \include, and \includegraphics, environment names, counter names, and placement parameters (like "[t]"). The problem with these contexts is that they are `moving arguments' but LaTeX does not `switch on' the \protect mechanism for them. If you need to use the underscore character in these places, the package option [strings] is provided to redefine commands taking a string argument so that the argument is protected (with \protect -> \string). The list of commands is given in "\UnderscoreCommands", with "\do" before each, covering \cite, \ref, \input, and their variants. Not included are many commands regarding font names, everything with counter names, environment names, page styles, and versions of \ref and \cite defined by external packages (e.g. \vref and \citeyear). You can add to the list of supported commands by defining \UnderscoreCommands before loading this package; e.g. \usepackage{chicago} \newcommand{\UnderscoreCommands}{% (\cite already done) \do\citeNP \do\citeA \do\citeANP \do\citeN \do\shortcite \do\shortciteNP \do\shortciteA \do\shortciteANP \do\shortciteN \do\citeyear \do\citeyearNP } \usepackage[strings]{underscore} Not all commands can be supported this way! Only commands that take a string argument *first* can be protected. One optional argument before the string argument is also permitted, as exemplified by \cite: both \cite{tags} and \cite[text]{tags} are allowed. A command like \@addtoreset which takes two counter names as arguments could not be protected by adding it to \UnderscoreCommands. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! When you use the [strings] option, you must load this package !! !! last (or nearly last). !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! There are two reasons: 1) The redefinitions done for protection must come after other packages define their customized versions of those commands. 2) The [strings] option requires the _ character to be activated immediately in order for the cite and ref tags to be read properly from the .aux file as plain strings, and this catcode setting might disrupt other packages. The babel package implements a protection mechanism for many commands, and will be a complete fix for most documents without the [strings] option. Many add-on packages are compatible with babel, so they will get the strings protection also. However, there are several commands that are not covered by babel, but can easily be supported by the [strings] and \UnderscoreCommands mechanism. Beware that using both [strings] and babel may lead to conflicts, but does appear to work (load babel last). Implementation Notes: ~~~~~~~~~~~~~~~~~~~~~ The first setting of "_" to be an active character is performed in a local group so as to not interfere with other packages. The catcode setting is repeated with \AtBeginDocument so the definition is in effect for the text. However, the catcode setting is repeated immediately when the [strings] option is detected. The definition of the active "_" is essentially: \ifmmode \sb \else \BreakableUnderscore \fi where "\sb" retains the normal subscript meaning of "_" and where "\BreakableUnderscore" is essentially "\_". The rest of the definition handles the "\protect"ion without causing \relax to be inserted before the character. \BreakableUnderscore uses "\nobreak\hskip\z at skip" to separate the underscore from surrounding words, thus allowing TeX to hyphenate them, but preventing free breaks around the underscore. Next, it checks the current font family, and uses the underscore character from tt fonts or otherwise \textunderscore (which is a character or rule depending on the font encoding). After the underscore, it inserts a discretionary hyphenation point as "\usc at dischyph", which is usually just "\-" except that it still works in the tabbing environment, although it will give "\discretionary{}{}{}" under the [nohyphen] option. After that, another piece of non-breaking interword glue is inserted. Ordinarily, the comparison "\ifx\f at family\ttdefault" will always fail because \ttdefault is `long' where \f at family is not (boooo hisss), but \ttdefault is redefined to be non-long by "\AtBeginDocument". The "\_" command is then defined to use "\BreakableUnderscore". If the [strings] option is not given, then that is all! Under the [strings] option, the list of special commands is processed to: - retain the original command as \US_command (\US_ref) - redefine the command as \US at prot\US_command for ordinary commands (\ref -> \US at prot\US_ref) or as \US at protopt\US_command when an optional argument is possible (\bibitem -> \US at protopt\US_bibitem). - self-protecting commands (\cite) retain their self-protection. Diagnosing the state of the pre-existing command is done by painful contortions involving \meaning. \US at prot and \US at protopt read the argument, process it with \protect enabled, then invoke the saved \US_command. Modifications: ~~~~~~~~~~~~~~ 12-Oct-2001 Babel (safe at actives) compatibility and [nohyphen] option. Test file integrity: ASCII 32-57, 58-126: !"#$%&'()*+,-./0123456789 :;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ Index: howto.cls =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/texinputs/howto.cls,v retrieving revision 1.13.2.1 retrieving revision 1.13.2.2 diff -u -d -r1.13.2.1 -r1.13.2.2 --- howto.cls 7 Jan 2005 06:57:36 -0000 1.13.2.1 +++ howto.cls 16 Oct 2005 05:23:58 -0000 1.13.2.2 @@ -50,7 +50,8 @@ % \renewcommand{\maketitle}{ \py at doHorizontalRule - \@ifundefined{pdfinfo}{}{{ + \ifpdf + \begingroup % This \def is required to deal with multi-line authors; it % changes \\ to ', ' (comma-space), making it pass muster for % generating document info in the PDF file. @@ -59,7 +60,8 @@ /Author (\@author) /Title (\@title) } - }} + \endgroup + \fi \begin{flushright} {\rm\Huge\py at HeaderFamily \@title} \par {\em\large\py at HeaderFamily \py at release\releaseinfo} \par Index: manual.cls =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/texinputs/manual.cls,v retrieving revision 1.18.2.1 retrieving revision 1.18.2.2 diff -u -d -r1.18.2.1 -r1.18.2.2 --- manual.cls 7 Jan 2005 06:57:36 -0000 1.18.2.1 +++ manual.cls 16 Oct 2005 05:23:58 -0000 1.18.2.2 @@ -64,7 +64,8 @@ \let\footnotesize\small \let\footnoterule\relax \py at doHorizontalRule% - \@ifundefined{pdfinfo}{}{{ + \ifpdf + \begingroup % This \def is required to deal with multi-line authors; it % changes \\ to ', ' (comma-space), making it pass muster for % generating document info in the PDF file. @@ -73,7 +74,8 @@ /Author (\@author) /Title (\@title) } - }} + \endgroup + \fi \begin{flushright}% {\rm\Huge\py at HeaderFamily \@title \par}% {\em\LARGE\py at HeaderFamily \py at release\releaseinfo \par} Index: python.sty =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/texinputs/python.sty,v retrieving revision 1.96.2.2 retrieving revision 1.96.2.3 diff -u -d -r1.96.2.2 -r1.96.2.3 --- python.sty 7 Jan 2005 06:57:36 -0000 1.96.2.2 +++ python.sty 16 Oct 2005 05:23:58 -0000 1.96.2.3 @@ -7,6 +7,7 @@ [1998/01/11 LaTeX package (Python markup)] \RequirePackage{longtable} +\RequirePackage{underscore} % Uncomment these two lines to ignore the paper size and make the page % size more like a typical published manual. @@ -541,28 +542,6 @@ {\reset at font\normalsize\py at HeaderFamily}} -% This gets the underscores closer to the right width; the only change -% from standard LaTeX is the width specified. - -\DeclareTextCommandDefault{\textunderscore}{% - \leavevmode \kern.06em\vbox{\hrule\@width.55em}} - -% Underscore hack (only act like subscript operator if in math mode) -% -% The following is due to Mark Wooding (the old version didn't work with -% Latex 2e. - -\DeclareRobustCommand\hackscore{% - \ifmmode_\else\textunderscore\fi% -} -\begingroup -\catcode`\_\active -\def\next{% - \AtBeginDocument{\catcode`\_\active\def_{\hackscore{}}}% -} -\expandafter\endgroup\next - - % Now for a lot of semantically-loaded environments that do a ton of magical % things to get the right formatting and index entries for the stuff in % Python modules and C API. @@ -1202,7 +1181,7 @@ } \fi -% \seelink{url}{link text} +% \seelink{url}{link text}{why it's interesting} \newcommand{\py at seelink}[3]{% \par \begin{fulllineitems} From jhylton at users.sourceforge.net Sun Oct 16 07:24:32 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:32 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/ref ref1.tex, 1.14, 1.14.8.1 ref2.tex, 1.37.2.2, 1.37.2.3 ref3.tex, 1.92.2.2, 1.92.2.3 ref4.tex, 1.33.2.2, 1.33.2.3 ref5.tex, 1.64.2.2, 1.64.2.3 ref6.tex, 1.55.2.2, 1.55.2.3 ref7.tex, 1.34.2.2, 1.34.2.3 Message-ID: <20051016052432.812151E400E@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/ref In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Doc/ref Modified Files: Tag: ast-branch ref1.tex ref2.tex ref3.tex ref4.tex ref5.tex ref6.tex ref7.tex Log Message: Merge head to branch (for the last time) Index: ref1.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref1.tex,v retrieving revision 1.14 retrieving revision 1.14.8.1 diff -u -d -r1.14 -r1.14.8.1 --- ref1.tex 28 Nov 2001 07:26:15 -0000 1.14 +++ ref1.tex 16 Oct 2005 05:23:58 -0000 1.14.8.1 @@ -21,7 +21,7 @@ reference document --- the implementation may change, and other implementations of the same language may work differently. On the other hand, there is currently only one Python implementation in -widespread use (although a second one now exists!), and +widespread use (although alternate implementations exist), and its particular quirks are sometimes worth being mentioned, especially where the implementation imposes additional limitations. Therefore, you'll find short ``implementation notes'' sprinkled throughout the @@ -34,6 +34,56 @@ with the language definition. +\section{Alternate Implementations\label{implementations}} + +Though there is one Python implementation which is by far the most +popular, there are some alternate implementations which are of +particular interest to different audiences. + +Known implementations include: + +\begin{itemize} +\item[CPython] +This is the original and most-maintained implementation of Python, +written in C. New language features generally appear here first. + +\item[Jython] +Python implemented in Java. This implementation can be used as a +scripting language for Java applications, or can be used to create +applications using the Java class libraries. It is also often used to +create tests for Java libraries. More information can be found at +\ulink{the Jython website}{http://www.jython.org/}. + +\item[Python for .NET] +This implementation actually uses the CPython implementation, but is a +managed .NET application and makes .NET libraries available. This was +created by Brian Lloyd. For more information, see the \ulink{Python +for .NET home page}{http://www.zope.org/Members/Brian/PythonNet}. + +\item[IronPython] +An alternate Python for\ .NET. Unlike Python.NET, this is a complete +Python implementation that generates IL, and compiles Python code +directly to\ .NET assemblies. It was created by Jim Hugunin, the +original creator of Jython. For more information, see \ulink{the +IronPython website}{http://workspaces.gotdotnet.com/ironpython}. + +\item[PyPy] +An implementation of Python written in Python; even the bytecode +interpreter is written in Python. This is executed using CPython as +the underlying interpreter. One of the goals of the project is to +encourage experimentation with the language itself by making it easier +to modify the interpreter (since it is written in Python). Additional +information is available on \ulink{the PyPy project's home +page}{http://codespeak.net/pypy/}. +\end{itemize} + +Each of these implementations varies in some way from the language as +documented in this manual, or introduces specific information beyond +what's covered in the standard Python documentation. Please refer to +the implementation-specific documentation to determine what else you +need to know about the specific implementation you're using. + + \section{Notation\label{notation}} The descriptions of lexical analysis and syntax use a modified BNF @@ -43,10 +93,10 @@ \index{syntax} \index{notation} -\begin{verbatim} -name: lc_letter (lc_letter | "_")* -lc_letter: "a"..."z" -\end{verbatim} +\begin{productionlist} + \production{name}{\token{lc_letter} (\token{lc_letter} | "_")*} + \production{lc_letter}{"a"..."z"} +\end{productionlist} The first line says that a \code{name} is an \code{lc_letter} followed by a sequence of zero or more \code{lc_letter}s and underscores. An @@ -55,7 +105,7 @@ names defined in lexical and grammar rules in this document.) Each rule begins with a name (which is the name defined by the rule) -and a colon. A vertical bar (\code{|}) is used to separate +and \code{::=}. A vertical bar (\code{|}) is used to separate alternatives; it is the least binding operator in this notation. A star (\code{*}) means zero or more repetitions of the preceding item; likewise, a plus (\code{+}) means one or more repetitions, and a Index: ref2.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref2.tex,v retrieving revision 1.37.2.2 retrieving revision 1.37.2.3 diff -u -d -r1.37.2.2 -r1.37.2.3 --- ref2.tex 7 Jan 2005 06:57:33 -0000 1.37.2.2 +++ ref2.tex 16 Oct 2005 05:23:58 -0000 1.37.2.3 @@ -54,11 +54,18 @@ \subsection{Physical lines\label{physical}} -A physical line ends in whatever the current platform's convention is -for terminating lines. On \UNIX, this is the \ASCII{} LF (linefeed) -character. On Windows, it is the \ASCII{} sequence CR LF (return -followed by linefeed). On Macintosh, it is the \ASCII{} CR (return) -character. +A physical line is a sequence of characters terminated by an end-of-line +sequence. In source files, any of the standard platform line +termination sequences can be used - the \UNIX form using \ASCII{} LF +(linefeed), the Windows form using the \ASCII{} sequence CR LF (return +followed by linefeed), or the Macintosh form using the \ASCII{} CR +(return) character. All of these forms can be used equally, regardless +of platform. + +When embedding Python, source code strings should be passed to Python +APIs using the standard C conventions for newline characters (the +\code{\e n} character, representing \ASCII{} LF, is the line +terminator). \subsection{Comments\label{comments}} @@ -342,7 +349,7 @@ \item[\code{__*__}] System-defined names. These names are defined by the interpreter - and it's implementation (including the standard library); + and its implementation (including the standard library); applications should not expect to define additional names using this convention. The set of names of this class defined by Python may be extended in future versions. Index: ref3.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref3.tex,v retrieving revision 1.92.2.2 retrieving revision 1.92.2.3 diff -u -d -r1.92.2.2 -r1.92.2.3 --- ref3.tex 7 Jan 2005 06:57:33 -0000 1.92.2.2 +++ ref3.tex 16 Oct 2005 05:23:58 -0000 1.92.2.3 @@ -165,7 +165,8 @@ \begin{description} \item[Integers] -These represent elements from the mathematical set of whole numbers. +These represent elements from the mathematical set of integers +(positive and negative). \obindex{integer} There are three types of integers: @@ -325,7 +326,7 @@ \function{ord()}\bifuncindex{ord} convert between code units and nonnegative integers representing the Unicode ordinals as defined in the Unicode Standard 3.0. Conversion from and to other encodings are -possible through the Unicode method \method{encode} and the built-in +possible through the Unicode method \method{encode()} and the built-in function \function{unicode()}.\bifuncindex{unicode} \obindex{unicode} \index{character} @@ -450,7 +451,7 @@ \lineiii{__module__}{The name of the module the function was defined in, or \code{None} if unavailable.}{Writable} - \lineiii{func_defaults}{Atuple containing default argument values + \lineiii{func_defaults}{A tuple containing default argument values for those arguments that have defaults, or \code{None} if no arguments have a default value}{Writable} @@ -604,7 +605,7 @@ have reached the end of the set of values to be returned. \item[Built-in functions] -A built-in function object is a wrapper around a \C{} function. Examples +A built-in function object is a wrapper around a C function. Examples of built-in functions are \function{len()} and \function{math.sin()} (\module{math} is a standard built-in module). The number and type of the arguments are @@ -917,14 +918,16 @@ \ttindex{f_builtins} \ttindex{f_restricted}} -Special writable attributes: \member{f_trace}, if not \code{None}, is a -function called at the start of each source code line (this is used by -the debugger); \member{f_exc_type}, \member{f_exc_value}, -\member{f_exc_traceback} represent the most recent exception caught in -this frame; \member{f_lineno} is the current line number of the frame ---- writing to this from within a trace function jumps to the given line -(only for the bottom-most frame). A debugger can implement a Jump -command (aka Set Next Statement) by writing to f_lineno. +Special writable attributes: \member{f_trace}, if not \code{None}, is +a function called at the start of each source code line (this is used +by the debugger); \member{f_exc_type}, \member{f_exc_value}, +\member{f_exc_traceback} represent the last exception raised in the +parent frame provided another exception was ever raised in the current +frame (in all other cases they are None); \member{f_lineno} is the +current line number of the frame --- writing to this from within a +trace function jumps to the given line (only for the bottom-most +frame). A debugger can implement a Jump command (aka Set Next +Statement) by writing to f_lineno. \withsubitem{(frame attribute)}{ \ttindex{f_trace} \ttindex{f_exc_type} @@ -1052,6 +1055,35 @@ \subsection{Basic customization\label{customization}} +\begin{methoddesc}[object]{__new__}{cls\optional{, \moreargs}} +Called to create a new instance of class \var{cls}. \method{__new__()} +is a static method (special-cased so you need not declare it as such) +that takes the class of which an instance was requested as its first +argument. The remaining arguments are those passed to the object +constructor expression (the call to the class). The return value of +\method{__new__()} should be the new object instance (usually an +instance of \var{cls}). + +Typical implementations create a new instance of the class by invoking +the superclass's \method{__new__()} method using +\samp{super(\var{currentclass}, \var{cls}).__new__(\var{cls}[, ...])} +with appropriate arguments and then modifying the newly-created instance +as necessary before returning it. + +If \method{__new__()} returns an instance of \var{cls}, then the new +instance's \method{__init__()} method will be invoked like +\samp{__init__(\var{self}[, ...])}, where \var{self} is the new instance +and the remaining arguments are the same as were passed to +\method{__new__()}. + +If \method{__new__()} does not return an instance of \var{cls}, then the +new instance's \method{__init__()} method will not be invoked. + +\method{__new__()} is intended mainly to allow subclasses of +immutable types (like int, str, or tuple) to customize instance +creation. +\end{methoddesc} + \begin{methoddesc}[object]{__init__}{self\optional{, \moreargs}} Called\indexii{class}{constructor} when the instance is created. The arguments are those passed to the class constructor expression. If a @@ -1176,8 +1208,8 @@ There are no implied relationships among the comparison operators. The truth of \code{\var{x}==\var{y}} does not imply that \code{\var{x}!=\var{y}} -is false. Accordingly, when defining \method{__eq__}, one should also -define \method{__ne__} so that the operators will behave as expected. +is false. Accordingly, when defining \method{__eq__()}, one should also +define \method{__ne__()} so that the operators will behave as expected. There are no reflected (swapped-argument) versions of these methods (to be used when the left argument does not support the operation but @@ -1306,8 +1338,9 @@ \begin{methoddesc}[object]{__getattribute__}{self, name} Called unconditionally to implement attribute accesses for instances -of the class. If the class also defines \method{__getattr__}, it will -never be called (unless called explicitly). +of the class. If the class also defines \method{__getattr__()}, the latter +will not be called unless \method{__getattribute__()} either calls it +explicitly or raises an \exception{AttributeError}. This method should return the (computed) attribute value or raise an \exception{AttributeError} exception. In order to avoid infinite recursion in this method, its @@ -1626,6 +1659,8 @@ raised; if of a value outside the set of indexes for the sequence (after any special interpretation of negative values), \exception{IndexError} should be raised. +For mapping types, if \var{key} is missing (not in the container), +\exception{KeyError} should be raised. \note{\keyword{for} loops expect that an \exception{IndexError} will be raised for illegal indexes to allow proper detection of the end of the sequence.} @@ -1870,7 +1905,7 @@ \var{x}\code{+=}\var{y}, where \var{x} is an instance of a class that has an \method{__iadd__()} method, \code{\var{x}.__iadd__(\var{y})} is called. If \var{x} is an instance of a class that does not define a -\method{__iadd()} method, \code{\var{x}.__add__(\var{y})} and +\method{__iadd__()} method, \code{\var{x}.__add__(\var{y})} and \code{\var{y}.__radd__(\var{x})} are considered, as with the evaluation of \var{x}\code{+}\var{y}. \end{methoddesc} @@ -1953,10 +1988,10 @@ Below, \method{__op__()} and \method{__rop__()} are used to signify the generic method names corresponding to an operator; -\method{__iop__} is used for the corresponding in-place operator. For +\method{__iop__()} is used for the corresponding in-place operator. For example, for the operator `\code{+}', \method{__add__()} and \method{__radd__()} are used for the left and right variant of the -binary operator, and \method{__iadd__} for the in-place variant. +binary operator, and \method{__iadd__()} for the in-place variant. \item Index: ref4.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref4.tex,v retrieving revision 1.33.2.2 retrieving revision 1.33.2.3 diff -u -d -r1.33.2.2 -r1.33.2.3 --- ref4.tex 7 Jan 2005 06:57:34 -0000 1.33.2.2 +++ ref4.tex 16 Oct 2005 05:23:58 -0000 1.33.2.3 @@ -182,16 +182,20 @@ \exception{SystemExit}\withsubitem{(built-in exception)}{\ttindex{SystemExit}}. -Exceptions are identified by class instances. -Selection of a matching except clause is based on object identity. -The \keyword{except} clause must reference the same class or a base -class of it. +Exceptions are identified by class instances. The \keyword{except} +clause is selected depending on the class of the instance: it must +reference the class of the instance or a base class thereof. The +instance can be received by the handler and can carry additional +information about the exceptional condition. -When an exception is raised, an object (maybe \code{None}) is passed -as the exception's \emph{value}; this object does not affect the -selection of an exception handler, but is passed to the selected -exception handler as additional information. For class exceptions, -this object must be an instance of the exception class being raised. +Exceptions can also be identified by strings, in which case the +\keyword{except} clause is selected by object identity. An arbitrary +value can be raised along with the identifying string which can be +passed to the handler. + +\deprecated{2.5}{String exceptions should not be used in new code. +They will not be supported in a future version of Python. Old code +should be rewritten to use class exceptions instead.} \begin{notice}[warning] Messages to exceptions are not part of the Python API. Their contents may Index: ref5.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref5.tex,v retrieving revision 1.64.2.2 retrieving revision 1.64.2.3 diff -u -d -r1.64.2.2 -r1.64.2.3 --- ref5.tex 7 Jan 2005 06:57:34 -0000 1.64.2.2 +++ ref5.tex 16 Oct 2005 05:23:58 -0000 1.64.2.3 @@ -960,7 +960,7 @@ object is a member of a set if the set is a sequence and contains an element equal to that object. However, it is possible for an object to support membership tests without being a sequence. In particular, -dictionaries support memership testing as a nicer way of spelling +dictionaries support membership testing as a nicer way of spelling \code{\var{key} in \var{dict}}; other mapping types may follow suit. For the list and tuple types, \code{\var{x} in \var{y}} is true if and @@ -1021,9 +1021,9 @@ In the context of Boolean operations, and also when expressions are used by control flow statements, the following values are interpreted -as false: \code{None}, numeric zero of all types, empty sequences -(strings, tuples and lists), and empty mappings (dictionaries). All -other values are interpreted as true. +as false: \code{False}, \code{None}, numeric zero of all types, and empty +strings and containers (including strings, tuples, lists, dictionaries, +sets and frozensets). All other values are interpreted as true. The operator \keyword{not} yields \code{True} if its argument is false, \code{False} otherwise. Index: ref6.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref6.tex,v retrieving revision 1.55.2.2 retrieving revision 1.55.2.3 diff -u -d -r1.55.2.2 -r1.55.2.3 --- ref6.tex 7 Jan 2005 06:57:34 -0000 1.55.2.2 +++ ref6.tex 16 Oct 2005 05:23:58 -0000 1.55.2.3 @@ -204,12 +204,12 @@ \item If the target is a subscription: The primary expression in the reference is evaluated. It should yield either a mutable sequence -object (e.g., a list) or a mapping object (e.g., a dictionary). Next, +object (such as a list) or a mapping object (such as a dictionary). Next, the subscript expression is evaluated. \indexii{subscription}{assignment} \obindex{mutable} -If the primary is a mutable sequence object (e.g., a list), the subscript +If the primary is a mutable sequence object (such as a list), the subscript must yield a plain integer. If it is negative, the sequence's length is added to it. The resulting value must be a nonnegative integer less than the sequence's length, and the sequence is asked to assign @@ -219,7 +219,7 @@ \obindex{sequence} \obindex{list} -If the primary is a mapping object (e.g., a dictionary), the subscript must +If the primary is a mapping object (such as a dictionary), the subscript must have a type compatible with the mapping's key type, and the mapping is then asked to create a key/datum pair which maps the subscript to the assigned object. This can either replace an existing key/value @@ -230,7 +230,7 @@ \item If the target is a slicing: The primary expression in the reference is -evaluated. It should yield a mutable sequence object (e.g., a list). The +evaluated. It should yield a mutable sequence object (such as a list). The assigned object should be a sequence object of the same type. Next, the lower and upper bound expressions are evaluated, insofar they are present; defaults are zero and the sequence's length. The bounds @@ -251,7 +251,7 @@ messages.) WARNING: Although the definition of assignment implies that overlaps -between the left-hand side and the right-hand side are `safe' (e.g., +between the left-hand side and the right-hand side are `safe' (for example \samp{a, b = b, a} swaps two variables), overlaps \emph{within} the collection of assigned-to variables are not safe! For instance, the following program prints \samp{[0, 2]}: @@ -523,8 +523,10 @@ \end{productionlist} If no expressions are present, \keyword{raise} re-raises the last -expression that was active in the current scope. If no exception is -active in the current scope, an exception is raised indicating this error. +exception that was active in the current scope. If no exception is +active in the current scope, a \exception{TypeError} exception is +raised indicating that this is an error (if running under IDLE, a +\exception{Queue.Empty} exception is raised instead). \index{exception} \indexii{raising}{exception} @@ -601,7 +603,7 @@ \keyword{continue} may only occur syntactically nested in a \keyword{for} or \keyword{while} loop, but not nested in a function or class definition or -\keyword{try} statement within that loop.\footnote{It may +\keyword{finally} statement within that loop.\footnote{It may occur within an \keyword{except} or \keyword{else} clause. The restriction on occurring in the \keyword{try} clause is implementor's laziness and will eventually be lifted.} @@ -872,7 +874,12 @@ a code object. If it is a string, the string is parsed as a suite of Python statements which is then executed (unless a syntax error occurs). If it is an open file, the file is parsed until \EOF{} and -executed. If it is a code object, it is simply executed. +executed. If it is a code object, it is simply executed. In all +cases, the code that's executed is expected to be be valid as file +input (see section~\ref{file-input}, ``File input''). Be aware that +the \keyword{return} and \keyword{yield} statements may not be used +outside of function definitions even within the context of code passed +to the \keyword{exec} statement. In all cases, if the optional parts are omitted, the code is executed in the current scope. If only the first expression after \keyword{in} Index: ref7.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref7.tex,v retrieving revision 1.34.2.2 retrieving revision 1.34.2.3 diff -u -d -r1.34.2.2 -r1.34.2.3 --- ref7.tex 7 Jan 2005 06:57:34 -0000 1.34.2.2 +++ ref7.tex 16 Oct 2005 05:23:58 -0000 1.34.2.3 @@ -223,11 +223,11 @@ except clause with an expression, that expression is evaluated, and the clause matches the exception if the resulting object is ``compatible'' with the exception. An object is compatible with an exception if it -is either the object that identifies the exception, or (for exceptions -that are classes) it is a base class of the exception, or it is a -tuple containing an item that is compatible with the exception. Note -that the object identities must match, i.e. it must be the same -object, not just an object with the same value. +is the class or a base class of the exception object, a tuple +containing an item compatible with the exception, or, in the +(deprecated) case of string exceptions, is the raised string itself +(note that the object identities must match, i.e. it must be the same +string object, not just a string with the same value). \kwindex{except} If no except clause matches the exception, the search for an exception @@ -239,14 +239,14 @@ on the call stack (it is treated as if the entire \keyword{try} statement raised the exception). -When a matching except clause is found, the exception's parameter is -assigned to the target specified in that except clause, if present, -and the except clause's suite is executed. All except clauses must -have an executable block. When the end of this block -is reached, execution continues normally after the entire try -statement. (This means that if two nested handlers exist for the same -exception, and the exception occurs in the try clause of the inner -handler, the outer handler will not handle the exception.) +When a matching except clause is found, the exception is assigned to +the target specified in that except clause, if present, and the except +clause's suite is executed. All except clauses must have an +executable block. When the end of this block is reached, execution +continues normally after the entire try statement. (This means that +if two nested handlers exist for the same exception, and the exception +occurs in the try clause of the inner handler, the outer handler will +not handle the exception.) Before an except clause's suite is executed, details about the exception are assigned to three variables in the @@ -323,6 +323,8 @@ {\token{decorator}+} \production{decorator} {"@" \token{dotted_name} ["(" [\token{argument_list} [","]] ")"] NEWLINE} + \production{dotted_name} + {\token{identifier} ("." \token{identifier})*} \production{parameter_list} {(\token{defparameter} ",")*} \productioncont{(~~"*" \token{identifier} [, "**" \token{identifier}]} @@ -439,7 +441,7 @@ {"class" \token{classname} [\token{inheritance}] ":" \token{suite}} \production{inheritance} - {"(" \token{expression_list} ")"} + {"(" [\token{expression_list}] ")"} \production{classname} {\token{identifier}} \end{productionlist} From jhylton at users.sourceforge.net Sun Oct 16 07:24:32 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:32 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/whatsnew whatsnew24.tex, 1.127.2.1, 1.127.2.2 whatsnew25.tex, 1.4.2.1, 1.4.2.2 Message-ID: <20051016052432.9BCE61E400A@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/whatsnew In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Doc/whatsnew Modified Files: Tag: ast-branch whatsnew24.tex whatsnew25.tex Log Message: Merge head to branch (for the last time) Index: whatsnew24.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/whatsnew/whatsnew24.tex,v retrieving revision 1.127.2.1 retrieving revision 1.127.2.2 diff -u -d -r1.127.2.1 -r1.127.2.2 --- whatsnew24.tex 7 Jan 2005 06:57:40 -0000 1.127.2.1 +++ whatsnew24.tex 16 Oct 2005 05:23:58 -0000 1.127.2.2 @@ -18,8 +18,8 @@ \maketitle \tableofcontents -This article explains the new features in Python 2.4, released on -November~30, 2004. +This article explains the new features in Python 2.4.1, released on +March~30, 2005. Python 2.4 is a medium-sized release. It doesn't introduce as many changes as the radical Python 2.2, but introduces more features than @@ -379,9 +379,11 @@ by Kevin D. Smith, Jim Jewett, and Skip Montanaro. Several people wrote patches implementing function decorators, but the one that was actually checked in was patch \#979728, written by Mark Russell.} -\end{seealso} -% XXX add link to decorators module in Wiki +\seeurl{http://www.python.org/moin/PythonDecoratorLibrary} +{This Wiki page contains several examples of decorators.} + +\end{seealso} %====================================================================== @@ -1039,7 +1041,7 @@ \item The inner loops for list and tuple slicing were optimized and now run about one-third faster. The inner loops - for dictionaries were also optimized , resulting in performance boosts for + for dictionaries were also optimized, resulting in performance boosts for \method{keys()}, \method{values()}, \method{items()}, \method{iterkeys()}, \method{itervalues()}, and \method{iteritems()}. (Contributed by Raymond Hettinger.) @@ -1426,6 +1428,12 @@ Python 2.4's regular expression engine can match this pattern without problems. +\item The \module{signal} module now performs tighter error-checking +on the parameters to the \function{signal.signal()} function. For +example, you can't set a handler on the \constant{SIGKILL} signal; +previous versions of Python would quietly accept this, but 2.4 will +raise a \exception{RuntimeError} exception. + \item Two new functions were added to the \module{socket} module. \function{socketpair()} returns a pair of connected sockets and \function{getservbyport(\var{port})} looks up the service name for a @@ -1705,6 +1713,11 @@ now return an empty list instead of raising a \exception{TypeError} exception if called with no arguments. +\item You can no longer compare the \class{date} and \class{datetime} + instances provided by the \module{datetime} module. Two + instances of different classes will now always be unequal, and + relative comparisons (\code{<}, \code{>}) will raise a \exception{TypeError}. + \item \function{dircache.listdir()} now passes exceptions to the caller instead of returning empty lists. @@ -1724,8 +1737,10 @@ \item \constant{None} is now a constant; code that binds a new value to the name \samp{None} is now a syntax error. -% signal module now raises a RuntimeError on insane calls - e.g. setting a -% handler on SIGKILL +\item The \function{signals.signal()} function now raises a +\exception{RuntimeError} exception for certain illegal values; +previously these errors would pass silently. For example, you can no +longer set a handler on the \constant{SIGKILL} signal. \end{itemize} @@ -1735,7 +1750,8 @@ The author would like to thank the following people for offering suggestions, corrections and assistance with various drafts of this -article: Koray Can, Hye-Shik Chang, Michael Dyck, Raymond Hettinger, -Brian Hurt, Hamish Lawson, Fredrik Lundh, Sean Reifschneider. +article: Koray Can, Hye-Shik Chang, Michael Dyck, Raymond Hettinger, +Brian Hurt, Hamish Lawson, Fredrik Lundh, Sean Reifschneider, +Sadruddin Rejeb. \end{document} Index: whatsnew25.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/whatsnew/whatsnew25.tex,v retrieving revision 1.4.2.1 retrieving revision 1.4.2.2 diff -u -d -r1.4.2.1 -r1.4.2.2 --- whatsnew25.tex 7 Jan 2005 06:57:41 -0000 1.4.2.1 +++ whatsnew25.tex 16 Oct 2005 05:23:58 -0000 1.4.2.2 @@ -1,5 +1,6 @@ \documentclass{howto} \usepackage{distutils} + % $Id$ @@ -26,8 +27,229 @@ %====================================================================== +\section{PEP 309: Partial Function Application} -% Large, PEP-level features and changes should be described here. +The \module{functional} module is intended to contain tools for +functional-style programming. Currently it only contains +\class{partial}, but new functions will probably be added in future +versions of Python. + +For programs written in a functional style, it can be useful to +construct variants of existing functions that have some of the +parameters filled in. Consider a Python function \code{f(a, b, c)}; +you could create a new function \code{g(b, c)} that was equivalent to +\code{f(1, b, c)}. This is called ``partial function application'', +and is provided by the \class{partial} class in the new +\module{functional} module. + +The constructor for \class{partial} takes the arguments +\code{(\var{function}, \var{arg1}, \var{arg2}, ... +\var{kwarg1}=\var{value1}, \var{kwarg2}=\var{value2})}. The resulting +object is callable, so you can just call it to invoke \var{function} +with the filled-in arguments. + +Here's a small but realistic example: + +\begin{verbatim} +import functional + +def log (message, subsystem): + "Write the contents of 'message' to the specified subsystem." + print '%s: %s' % (subsystem, message) + ... + +server_log = functional.partial(log, subsystem='server') +\end{verbatim} + +Here's another example, from a program that uses PyGTk. Here a +context-sensitive pop-up menu is being constructed dynamically. The +callback provided for the menu option is a partially applied version +of the \method{open_item()} method, where the first argument has been +provided. + +\begin{verbatim} +... +class Application: + def open_item(self, path): + ... + def init (self): + open_func = functional.partial(self.open_item, item_path) + popup_menu.append( ("Open", open_func, 1) ) +\end{verbatim} + + +\begin{seealso} + +\seepep{309}{Partial Function Application}{PEP proposed and written by +Peter Harris; implemented by Hye-Shik Chang, with adaptations by +Raymond Hettinger.} + +\end{seealso} + + +%====================================================================== +\section{PEP 314: Metadata for Python Software Packages v1.1} + +Some simple dependency support was added to Distutils. The +\function{setup()} function now has \code{requires},\code{provides}, +and \code{obsoletes}. When you build a source distribution using the +\code{sdist} command, the dependency information will be recorded in +the \file{PKG-INFO} file. + +Another new keyword is \code{download_url}, which should be set to a +URL for the package's source code. This means it's now possible to +look up an entry in the package index, determine the dependencies for +a package, and download the required packages. + +% XXX put example here + +\begin{seealso} + +\seepep{314}{Metadata for Python Software Packages v1.1}{PEP proposed +and written by A.M. Kuchling, Richard Jones, and Fred Drake; +implemented by Richard Jones and Fred Drake.} + +\end{seealso} + + +%====================================================================== +\section{PEP 342: New Generator Features} + +As introduced in Python 2.3, generators only produce output; once a +generator's code was invoked to create an iterator, there's no way to +pass new parameters into the function when its execution is resumed. +Hackish solutions to this include making the generator's code look at +a global variable and then changing the global variable's value, or +passing in some mutable object that callers then modify. Python +2.5 adds the ability to pass values \emph{into} a generator. + +To refresh your memory of basic generators, here's a simple example: + +\begin{verbatim} +def counter (maximum): + i = 0 + while i < maximum: + yield i + i += 1 +\end{verbatim} + +When you call \code{counter(10)}, the result is an iterator that +returns the values from 0 up to 9. On encountering the +\keyword{yield} statement, the iterator returns the provided value and +suspends the function's execution, preserving the local variables. +Execution resumes on the following call to the iterator's +\method{next()} method, picking up after the \keyword{yield}. + +In Python 2.3, \keyword{yield} was a statement; it didn't return any +value. In 2.5, \keyword{yield} is now an expression, returning a +value that can be assigned to a variable or otherwise operated on: + +\begin{verbatim} +val = (yield i) +\end{verbatim} + +I recommend that you always put parentheses around a \keyword{yield} +expression when you're doing something with the returned value, as in +the above example. The parentheses aren't always necessary, but it's +easier to always add them instead of having to remember when they're +needed. The exact rules are that a \keyword{yield}-expression must +always be parenthesized except when it occurs at the top-level +expression on the right-hand side of an assignment, meaning +you can to write \code{val = yield i} but \code{val = (yield i) + 12}. + +Values are sent into a generator by calling its +\method{send(\var{value})} method. The generator's code is then +resumed and the \keyword{yield} expression produces \var{value}. +If the regular \method{next()} method is called, the \keyword{yield} +returns \constant{None}. + +Here's the previous example, modified to allow changing the value of +the internal counter. + +\begin{verbatim} +def counter (maximum): + i = 0 + while i < maximum: + val = (yield i) + # If value provided, change counter + if val is not None: + i = val + else: + i += 1 +\end{verbatim} + +And here's an example of changing the counter: + +\begin{verbatim} +>>> it = counter(10) +>>> print it.next() +0 +>>> print it.next() +1 +>>> print it.send(8) +8 +>>> print it.next() +9 +>>> print it.next() +Traceback (most recent call last): + File ``t.py'', line 15, in ? + print it.next() +StopIteration +\end{verbatim} + +Because \keyword{yield} will often be returning \constant{None}, +you shouldn't just use its value in expressions unless you're sure +that only the \method{send()} method will be used. + +There are two other new methods on generators in addition to +\method{send()}: + +\begin{itemize} + + \item \method{throw(\var{type}, \var{value}=None, + \var{traceback}=None)} is used to raise an exception inside the + generator; the exception is raised by the \keyword{yield} expression + where the generator's execution is paused. + + \item \method{close()} raises a new \exception{GeneratorExit} + exception inside the generator to terminate the iteration. + On receiving this + exception, the generator's code must either raise + \exception{GeneratorExit} or \exception{StopIteration}; catching the + exception and doing anything else is illegal and will trigger + a \exception{RuntimeError}. \method{close()} will also be called by + Python's garbage collection when the generator is garbage-collected. + + If you need to run cleanup code in case of a \exception{GeneratorExit}, + I suggest using a \code{try: ... finally:} suite instead of + catching \exception{GeneratorExit}. + +\end{itemize} + +The cumulative effect of these changes is to turn generators from +one-way producers of information into both producers and consumers. +Generators also become \emph{coroutines}, a more generalized form of +subroutines; subroutines are entered at one point and exited at +another point (the top of the function, and a \keyword{return +statement}), but coroutines can be entered, exited, and resumed at +many different points (the \keyword{yield} statements).science term + + +\begin{seealso} + +\seepep{342}{Coroutines via Enhanced Generators}{PEP written by +Guido van Rossum and Phillip J. Eby; +implemented by Phillip J. Eby. Includes examples of +some fancier uses of generators as coroutines.} + +\seeurl{http://en.wikipedia.org/wiki/Coroutine}{The Wikipedia entry for +coroutines.} + +\seeurl{http://www.sidhe.org/~dan/blog/archives/000178.html}{An +explanation of coroutines from a Perl point of view, written by Dan +Sugalski.} + +\end{seealso} %====================================================================== @@ -40,7 +262,7 @@ \item The \function{min()} and \function{max()} built-in functions gained a \code{key} keyword argument analogous to the \code{key} -argument for \function{sort()}. This argument supplies a function +argument for \method{sort()}. This argument supplies a function that takes a single argument and is called for every value in the list; \function{min()}/\function{max()} will return the element with the smallest/largest return value from this function. @@ -56,6 +278,25 @@ (Contributed by Steven Bethard and Raymond Hettinger.) +\item Two new built-in functions, \function{any()} and +\function{all()}, evaluate whether an iterator contains any true or +false values. \function{any()} returns \constant{True} if any value +returned by the iterator is true; otherwise it will return +\constant{False}. \function{all()} returns \constant{True} only if +all of the values returned by the iterator evaluate as being true. + +% XXX who added? + + +\item The list of base classes in a class definition can now be empty. +As an example, this is now legal: + +\begin{verbatim} +class C(): + pass +\end{verbatim} +(Implemented by Brett Cannon.) + \end{itemize} @@ -64,7 +305,12 @@ \begin{itemize} -\item Optimizations should be described here. +\item When they were introduced +in Python 2.4, the built-in \class{set} and \class{frozenset} types +were built on top of Python's dictionary type. +In 2.5 the internal data structure has been customized for implementing sets, +and as a result sets will use a third less memory and are somewhat faster. +(Implemented by Raymond Hettinger.) \end{itemize} @@ -84,14 +330,116 @@ \begin{itemize} -\item Descriptions go here. +% collections.deque now has .remove() + +% the cPickle module no longer accepts the deprecated None option in the +% args tuple returned by __reduce__(). + +% csv module improvements + +% datetime.datetime() now has a strptime class method which can be used to +% create datetime object using a string and format. + +\item A new \module{hashlib} module has been added to replace the +\module{md5} and \module{sha} modules. \module{hashlib} adds support +for additional secure hashes (SHA-224, SHA-256, SHA-384, and SHA-512). +When available, the module uses OpenSSL for fast platform optimized +implementations of algorithms. The old \module{md5} and \module{sha} +modules still exist as wrappers around hashlib to preserve backwards +compatibility. (Contributed by Gregory P. Smith.) + +\item The \function{nsmallest()} and +\function{nlargest()} functions in the \module{heapq} module +now support a \code{key} keyword argument similar to the one +provided by the \function{min()}/\function{max()} functions +and the \method{sort()} methods. For example: +Example: + +\begin{verbatim} +>>> import heapq +>>> L = ["short", 'medium', 'longest', 'longer still'] +>>> heapq.nsmallest(2, L) # Return two lowest elements, lexicographically +['longer still', 'longest'] +>>> heapq.nsmallest(2, L, key=len) # Return two shortest elements +['short', 'medium'] +\end{verbatim} + +(Contributed by Raymond Hettinger.) + +\item The \function{itertools.islice()} function now accepts +\code{None} for the start and step arguments. This makes it more +compatible with the attributes of slice objects, so that you can now write +the following: + +\begin{verbatim} +s = slice(5) # Create slice object +itertools.islice(iterable, s.start, s.stop, s.step) +\end{verbatim} + +(Contributed by Raymond Hettinger.) + +\item The \module{operator} module's \function{itemgetter()} +and \function{attrgetter()} functions now support multiple fields. +A call such as \code{operator.attrgetter('a', 'b')} +will return a function +that retrieves the \member{a} and \member{b} attributes. Combining +this new feature with the \method{sort()} method's \code{key} parameter +lets you easily sort lists using multiple fields. + +% XXX who added? + + +\item The \module{os} module underwent a number of changes. The +\member{stat_float_times} variable now defaults to true, meaning that +\function{os.stat()} will now return time values as floats. (This +doesn't necessarily mean that \function{os.stat()} will return times +that are precise to fractions of a second; not all systems support +such precision.) + +Constants named \member{os.SEEK_SET}, \member{os.SEEK_CUR}, and +\member{os.SEEK_END} have been added; these are the parameters to the +\function{os.lseek()} function. Two new constants for locking are +\member{os.O_SHLOCK} and \member{os.O_EXLOCK}. + +On FreeBSD, the \function{os.stat()} function now returns +times with nanosecond resolution, and the returned object +now has \member{st_gen} and \member{st_birthtime}. +The \member{st_flags} member is also available, if the platform supports it. +% XXX patch 1180695, 1212117 + +\item New module: \module{spwd} provides functions for accessing the +shadow password database on systems that support it. +% XXX give example + +\item The \class{TarFile} class in the \module{tarfile} module now has +an \method{extractall()} method that extracts all members from the +archive into the current working directory. It's also possible to set +a different directory as the extraction target, and to unpack only a +subset of the archive's members. + +A tarfile's compression can be autodetected by +using the mode \code{'r|*'}. +% patch 918101 +(Contributed by Lars Gust\"abel.) + +\item The \module{xmlrpclib} module now supports returning + \class{datetime} objects for the XML-RPC date type. Supply + \code{use_datetime=True} to the \function{loads()} function + or the \class{Unmarshaller} class to enable this feature. +% XXX patch 1120353 + \end{itemize} + %====================================================================== % whole new modules get described in \subsections here +% XXX new distutils features: upload + + + % ====================================================================== \section{Build and C API Changes} @@ -100,8 +448,15 @@ \begin{itemize} -\item The \cfunction{PyRange_New()} function was removed. It was never documented, -never used in the core code, and had dangerously lax error checking. +\item The built-in set types now have an official C API. Call +\cfunction{PySet_New()} and \cfunction{PyFrozenSet_New()} to create a +new set, \cfunction{PySet_Add()} and \cfunction{PySet_Discard()} to +add and remove elements, and \cfunction{PySet_Contains} and +\cfunction{PySet_Size} to examine the set's state. + +\item The \cfunction{PyRange_New()} function was removed. It was +never documented, never used in the core code, and had dangerously lax +error checking. \end{itemize} @@ -137,7 +492,24 @@ \begin{itemize} -\item Everything is all in the details! +\item Some old deprecated modules (\module{statcache}, \module{tzparse}, + \module{whrandom}) have been moved to \file{Lib/lib-old}. +You can get access to these modules again by adding the directory +to your \code{sys.path}: + +\begin{verbatim} +import os +from distutils import sysconfig + +lib_dir = sysconfig.get_python_lib(standard_lib=True) +old_dir = os.path.join(lib_dir, 'lib-old') +sys.path.append(old_dir) +\end{verbatim} + +Doing so is discouraged, however; it's better to update any code that +still uses these modules. + +% the pickle module no longer uses the deprecated bin parameter. \end{itemize} From jhylton at users.sourceforge.net Sun Oct 16 07:24:32 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:32 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/mac libframework.tex, 1.9.26.1, 1.9.26.2 libmac.tex, 1.22.2.1, 1.22.2.2 libmacic.tex, 1.16.20.1, 1.16.20.2 libmacos.tex, 1.17.18.2, 1.17.18.3 scripting.tex, 1.1.4.2, 1.1.4.3 undoc.tex, 1.8.2.2, 1.8.2.3 using.tex, 1.4.10.2, 1.4.10.3 Message-ID: <20051016052432.AB3091E4014@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/mac In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Doc/mac Modified Files: Tag: ast-branch libframework.tex libmac.tex libmacic.tex libmacos.tex scripting.tex undoc.tex using.tex Log Message: Merge head to branch (for the last time) Index: libframework.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/mac/libframework.tex,v retrieving revision 1.9.26.1 retrieving revision 1.9.26.2 diff -u -d -r1.9.26.1 -r1.9.26.2 --- libframework.tex 7 Jan 2005 06:57:30 -0000 1.9.26.1 +++ libframework.tex 16 Oct 2005 05:23:58 -0000 1.9.26.2 @@ -15,7 +15,8 @@ non-standard way it is not necessary to override the complete event handling. -The \module{FrameWork} is still very much work-in-progress, and the +Work on the \module{FrameWork} has pretty much stopped, now that +\module{PyObjC} is available for full Cocoa access from Python, and the documentation describes only the most important functionality, and not in the most logical manner at that. Examine the source or the examples for more details. The following are some comments posted on the @@ -28,8 +29,8 @@ instance, uses a different way to enable/disable menus and that plugs right in leaving the rest intact. The weak points of \module{FrameWork} are that it has no abstract command interface (but -that shouldn't be difficult), that it's dialog support is minimal and -that it's control/toolbar support is non-existent. +that shouldn't be difficult), that its dialog support is minimal and +that its control/toolbar support is non-existent. \end{quotation} Index: libmac.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/mac/libmac.tex,v retrieving revision 1.22.2.1 retrieving revision 1.22.2.2 diff -u -d -r1.22.2.1 -r1.22.2.2 --- libmac.tex 28 Apr 2003 17:33:47 -0000 1.22.2.1 +++ libmac.tex 16 Oct 2005 05:23:58 -0000 1.22.2.2 @@ -1,36 +1,3 @@ -\section{\module{mac} --- - Implementations for the \module{os} module} - -\declaremodule{builtin}{mac} - \platform{Mac} -\modulesynopsis{Implementations for the \module{os} module.} - - -This module implements the Mac OS 9 operating system dependent functionality -provided by the standard module \module{os}\refstmodindex{os}. It is -best accessed through the \module{os} module. This module is only available in -MacPython-OS9, on MacPython-OSX \module{posix} is used. - -The following functions are available in this module: -\function{chdir()}, -\function{close()}, -\function{dup()}, -\function{fdopen()}, -\function{getcwd()}, -\function{lseek()}, -\function{listdir()}, -\function{mkdir()}, -\function{open()}, -\function{read()}, -\function{rename()}, -\function{rmdir()}, -\function{stat()}, -\function{sync()}, -\function{unlink()}, -\function{write()}, -as well as the exception \exception{error}. Note that the times -returned by \function{stat()} are floating-point values, like all time -values in MacPython-OS9. \section{\module{macpath} --- MacOS path manipulation functions} @@ -41,9 +8,10 @@ \modulesynopsis{MacOS path manipulation functions.} -This module is the Macintosh implementation of the \module{os.path} -module. It is most portably accessed as -\module{os.path}\refstmodindex{os.path}. Refer to the +This module is the Mac OS 9 (and earlier) implementation of the \module{os.path} +module. It can be used to manipulate old-style Macintosh pathnames on Mac OS +X (or any other platform). +Refer to the \citetitle[../lib/lib.html]{Python Library Reference} for documentation of \module{os.path}. Index: libmacic.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/mac/libmacic.tex,v retrieving revision 1.16.20.1 retrieving revision 1.16.20.2 diff -u -d -r1.16.20.1 -r1.16.20.2 --- libmacic.tex 7 Jan 2005 06:57:31 -0000 1.16.20.1 +++ libmacic.tex 16 Oct 2005 05:23:58 -0000 1.16.20.2 @@ -6,13 +6,8 @@ \modulesynopsis{Access to Internet Config.} -This module provides access to Macintosh Internet -Config\index{Internet Config} package, -which stores preferences for Internet programs such as mail address, -default homepage, etc. Also, Internet Config contains an elaborate set -of mappings from Macintosh creator/type codes to foreign filename -extensions plus information on how to transfer files (binary, ascii, -etc.). Since MacOS 9, this module is a control panel named Internet. +This module provides access to various internet-related preferences +set through \program{System Preferences} or the \program{Finder}. There is a low-level companion module \module{icglue}\refbimodindex{icglue} which provides the basic @@ -92,7 +87,7 @@ \begin{methoddesc}{mapfile}{file} Return the mapping entry for the given \var{file}, which can be passed -as either a filename or an \function{macfs.FSSpec()} result, and which +as either a filename or an \function{FSSpec()} result, and which need not exist. The mapping entry is returned as a tuple \code{(\var{version}, @@ -122,7 +117,7 @@ \begin{methoddesc}{settypecreator}{file} Given an existing \var{file}, specified either as a filename or as an -\function{macfs.FSSpec()} result, set its creator and type correctly based +\function{FSSpec()} result, set its creator and type correctly based on its extension. The finder is told about the change, so the finder icon will be updated quickly. \end{methoddesc} Index: libmacos.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/mac/libmacos.tex,v retrieving revision 1.17.18.2 retrieving revision 1.17.18.3 diff -u -d -r1.17.18.2 -r1.17.18.3 --- libmacos.tex 7 Jan 2005 06:57:31 -0000 1.17.18.2 +++ libmacos.tex 16 Oct 2005 05:23:58 -0000 1.17.18.3 @@ -14,11 +14,10 @@ artifact. \begin{datadesc}{runtimemodel} -Either\code{'carbon'} or \code{'macho'}. This -signifies whether this Python uses the Mac OS X and Mac OS 9 compatible -CarbonLib style or the Mac OS -X-only Mach-O style. In earlier versions of Python the value could -also be \code{'ppc'} for the classic Mac OS 8 runtime model. +Always \code{'macho'}, from Python 2.4 on. +In earlier versions of Python the value could +also be \code{'ppc'} for the classic Mac OS 8 runtime model or +\code{'carbon'} for the Mac OS 9 runtime model. \end{datadesc} \begin{datadesc}{linkmodel} @@ -26,8 +25,9 @@ incompatible between linking models, packages could use this information to give more decent error messages. The value is one of \code{'static'} for a statically linked Python, \code{'framework'} for Python in a Mac OS X framework, -\code{'shared'} for Python in a standard unix shared library and -\code{'cfm'} for the Mac OS 9-compatible Python. +\code{'shared'} for Python in a standard unix shared library. +Older Pythons could also have the value +\code{'cfm'} for Mac OS 9-compatible Python. \end{datadesc} \begin{excdesc}{Error} @@ -39,84 +39,16 @@ module \refmodule{macerrors}.\refstmodindex{macerrors} \end{excdesc} -\begin{funcdesc}{SetEventHandler}{handler} -In the inner interpreter loop Python will occasionally check for events, -unless disabled with \function{ScheduleParams()}. With this function you -can pass a Python event-handler function that will be called if an event -is available. The event is passed as parameter and the function should return -non-zero if the event has been fully processed, otherwise event processing -continues (by passing the event to the console window package, for instance). - -Call \function{SetEventHandler()} without a parameter to clear the -event handler. Setting an event handler while one is already set is an -error. - -Availability: MacPython-OS9. -\end{funcdesc} - -\begin{funcdesc}{SchedParams}{\optional{doint\optional{, evtmask\optional{, - besocial\optional{, interval\optional{, - bgyield}}}}}} -Influence the interpreter inner loop event handling. \var{Interval} -specifies how often (in seconds, floating point) the interpreter -should enter the event processing code. When true, \var{doint} causes -interrupt (command-dot) checking to be done. \var{evtmask} tells the -interpreter to do event processing for events in the mask (redraws, -mouseclicks to switch to other applications, etc). The \var{besocial} -flag gives other processes a chance to run. They are granted minimal -runtime when Python is in the foreground and \var{bgyield} seconds per -\var{interval} when Python runs in the background. - -All parameters are optional, and default to the current value. The return -value of this function is a tuple with the old values of these options. -Initial defaults are that all processing is enabled, checking is done every -quarter second and the processor is given up for a quarter second when in the -background. - -The most common use case is to call \code{SchedParams(0, 0)} to completely disable -event handling in the interpreter mainloop. - -Availability: MacPython-OS9. -\end{funcdesc} - -\begin{funcdesc}{HandleEvent}{ev} -Pass the event record \var{ev} back to the Python event loop, or -possibly to the handler for the \code{sys.stdout} window (based on the -compiler used to build Python). This allows Python programs that do -their own event handling to still have some command-period and -window-switching capability. - -If you attempt to call this function from an event handler set through -\function{SetEventHandler()} you will get an exception. - -Availability: MacPython-OS9. -\end{funcdesc} \begin{funcdesc}{GetErrorString}{errno} Return the textual description of MacOS error code \var{errno}. \end{funcdesc} -\begin{funcdesc}{splash}{resid} -This function will put a splash window -on-screen, with the contents of the DLOG resource specified by -\var{resid}. Calling with a zero argument will remove the splash -screen. This function is useful if you want an applet to post a splash screen -early in initialization without first having to load numerous -extension modules. - -Availability: MacPython-OS9. -\end{funcdesc} - \begin{funcdesc}{DebugStr}{message \optional{, object}} -On Mac OS 9, drop to the low-level debugger with message \var{message}. The -optional \var{object} argument is not used, but can easily be -inspected from the debugger. On Mac OS X the string is simply printed -to stderr. - -Note that you should use this function with extreme care: if no -low-level debugger like MacsBug is installed this call will crash your -system. It is intended mainly for developers of Python extension -modules. +On Mac OS X the string is simply printed to stderr (on older +Mac OS systems more elaborate functionality was available), +but it provides a convenient location to attach a breakpoint +in a low-level debugger like \program{gdb}. \end{funcdesc} \begin{funcdesc}{SysBeep}{} @@ -155,6 +87,4 @@ bundle. A script runs from an application bundle either when it has been started with \program{pythonw} instead of \program{python} or when running as an applet. - -On Mac OS 9 the method always returns \code{True}. \end{funcdesc} Index: scripting.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/mac/scripting.tex,v retrieving revision 1.1.4.2 retrieving revision 1.1.4.3 diff -u -d -r1.1.4.2 -r1.1.4.3 --- scripting.tex 7 Jan 2005 06:57:31 -0000 1.1.4.2 +++ scripting.tex 16 Oct 2005 05:23:58 -0000 1.1.4.3 @@ -1,9 +1,10 @@ \chapter{MacPython OSA Modules \label{scripting}} -Python has a fairly complete implementation of the Open Scripting -Architecure (OSA, also commonly referred to as AppleScript), allowing +This chapter describes the current implementation of the Open Scripting +Architecure (OSA, also commonly referred to as AppleScript) for Python, allowing you to control scriptable applications from your Python program, -and with a fairly pythonic interface. +and with a fairly pythonic interface. Development on this set of modules +has stopped, and a replacement is expected for Python 2.5. For a description of the various components of AppleScript and OSA, and to get an understanding of the architecture and terminology, you should Index: undoc.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/mac/undoc.tex,v retrieving revision 1.8.2.2 retrieving revision 1.8.2.3 diff -u -d -r1.8.2.2 -r1.8.2.3 --- undoc.tex 7 Jan 2005 06:57:31 -0000 1.8.2.2 +++ undoc.tex 16 Oct 2005 05:23:58 -0000 1.8.2.3 @@ -21,17 +21,7 @@ \modulesynopsis{Helper module for BuildApplet, BuildApplication and macfreeze.} - -\section{\module{py_resource} --- Resources from Python code} -\declaremodule[pyresource]{standard}{py_resource} - \platform{Mac} -\modulesynopsis{Helper to create \texttt{'PYC~'} resources for compiled - applications.} - -This module is primarily used as a help module for -\program{BuildApplet} and \program{BuildApplication}. It is able to -store compiled Python code as \texttt{'PYC~'} resources in a file. - +\deprecated{2.4} \section{\module{cfmfile} --- Code Fragment Resource module} \declaremodule{standard}{cfmfile} @@ -43,6 +33,7 @@ used by BuildApplication to combine all plugin modules to a single executable. +\deprecated{2.4} \section{\module{icopen} --- Internet Config replacement for \method{open()}} \declaremodule{standard}{icopen} @@ -79,30 +70,6 @@ A low-level interface to Navigation Services. -\section{\module{mkcwproject} --- Create CodeWarrior projects} -\declaremodule{standard}{mkcwproject} - \platform{Mac} -\modulesynopsis{Create CodeWarrior projects.} - -\refmodindex{distutils} -\module{mkcwproject} creates project files for the Metrowerks CodeWarrior -development environment. It is a helper module for -\module{distutils} but can be used separately for more -control. - - -\section{\module{nsremote} --- Wrapper around Netscape OSA modules} -\declaremodule{standard}{nsremote} - \platform{Mac} -\modulesynopsis{Wrapper around Netscape OSA modules.} - -\module{nsremote} is a wrapper around the Netscape OSA modules that -allows you to easily send your browser to a given URL. A related -module that may be of interest is the \module{webbrowser} module, -documented in the \citetitle[../lib/lib.html]{Python Library -Reference}. - - \section{\module{PixMapWrapper} --- Wrapper for PixMap objects} \declaremodule{standard}{PixMapWrapper} \platform{Mac} @@ -112,43 +79,6 @@ allows access to the fields by name. It also has methods to convert to and from \module{PIL} images. - -\section{\module{preferences} --- Application preferences manager} -\declaremodule{standard}{preferences} - \platform{Mac} -\modulesynopsis{Nice application preferences manager with support for - defaults.} - -The \module{preferences} module allows storage of user preferences in -the system-wide preferences folder, with defaults coming from the -application itself and the possibility to override preferences for -specific situations. - - -\section{\module{pythonprefs} --- Preferences manager for Python} -\declaremodule{standard}{pythonprefs} - \platform{Mac} -\modulesynopsis{Specialized preferences manager for the Python - interpreter.} - -This module is a specialization of the \refmodule{preferences} module -that allows reading and writing of the preferences for the Python -interpreter. - - -\section{\module{quietconsole} --- Non-visible standard output} -\declaremodule{standard}{quietconsole} - \platform{Mac} -\modulesynopsis{Buffered, non-visible standard output.} - -\module{quietconsole} allows you to keep stdio output in a buffer -without displaying it (or without displaying the stdout window -altogether, if set with \program{EditPythonPrefs}) until you try to read from -stdin or disable the buffering, at which point all the saved output is -sent to the window. Good for programs with graphical user interfaces -that do want to display their output at a crash. - - \section{\module{videoreader} --- Read QuickTime movies} \declaremodule{standard}{videoreader} \platform{Mac} Index: using.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/mac/using.tex,v retrieving revision 1.4.10.2 retrieving revision 1.4.10.3 diff -u -d -r1.4.10.2 -r1.4.10.3 --- using.tex 7 Jan 2005 06:57:31 -0000 1.4.10.2 +++ using.tex 16 Oct 2005 05:23:58 -0000 1.4.10.3 @@ -1,27 +1,25 @@ -\chapter{Using Python on a Mac OS 9 Macintosh \label{using}} +\chapter{Using Python on a Macintosh \label{using}} \sectionauthor{Bob Savage}{bobsavage at mac.com} -Using Python on a Macintosh, especially on Mac OS 9 (MacPython-OSX -includes a complete \UNIX{} Python) can seem like something completely -different than using it on a \UNIX-like or Windows system. Most of the -Python documentation, both the ``official'' documentation and published -books, describe only how Python is used on these systems, causing -confusion for the new user of MacPython-OS9. This chapter gives a brief -introduction to the specifics of using Python on a Macintosh. - - -The section on the IDE (see Section \ref{IDE}) is relevant to MacPython-OSX -too. +Python on a Macintosh running Mac OS X is in principle very similar to +Python on any other \UNIX platform, but there are a number of additional +features such as the IDE and the Package Manager that are worth pointing out. -\section{Getting and Installing MacPython-OSX \label{getting-OSX}} +Python on Mac OS 9 or earlier can be quite different from Python on +Unix or Windows, but is beyond the scope of this manual, as that platform +is no longer supported, starting with Python 2.4. See +\url{http://www.cwi.nl/\textasciitilde jack/macpython} for installers +for the latest 2.3 release for Mac OS 9 and related documentation. -As of Python 2.3a2 the only sure way of getting MacPython-OSX on your machine -is getting a source distribution and building what is called a "framework Python". -The details are in the file \file{Mac/OSX/README}. +\section{Getting and Installing MacPython \label{getting-OSX}} -As binary installers become available the details will be posted to -\url{http://www.cwi.nl/\textasciitilde jack/macpython.html}. +Mac OS X 10.3 comes with Python 2.3 pre-installed by Apple. +This installation does not come with the IDE and other additions, however, +so to get these you need to install the \program{MacPython for Panther additions} +from the MacPython website, \url{http://www.cwi.nl/\textasciitilde jack/macpython}. +For MacPython 2.4, or for any MacPython on earlier releases of Mac OS X, +you need to install a full distribution from the same website. What you get after installing is a number of things: @@ -41,6 +39,14 @@ To uninstall MacPython you can simply remove these three things. +If you use the ``additions'' installer to install on top of an existing +Apple-Python you will not get the framework and the commandline interpreter, +as they have been installed by Apple already, in +\file{/System/Library/Frameworks/Python.framework} and +\file{/usr/bin/python}, respectively. You should in principle never modify +or delete these, as they are Apple-controlled and may be used by Apple- or +third-party software. + PythonIDE contains an Apple Help Viewer book called "MacPython Help" which you can access through its help menu. If you are completely new to Python you should start reading the IDE introduction in that document. @@ -57,18 +63,15 @@ If you want to run Python scripts from the Terminal window command line or from the Finder you first need an editor to create your script. Mac OS X comes with a number of standard \UNIX{} command line editors, -\program{vi} and \program{emacs} among them. If you want a more Mac-like +\program{vim} and \program{emacs} among them. If you want a more Mac-like editor \program{BBEdit} or \program{TextWrangler} from Bare Bones Software (see \url{http://www.barebones.com/products/bbedit/index.shtml}) are -good choices. Their freeware \program{BBEdit Lite} is officially -discontinued but still available. \program{AppleWorks} or any other +good choices. \program{AppleWorks} or any other word processor that can save files in ASCII is also a possibility, including \program{TextEdit} which is included with OS X. To run your script from the Terminal window you must make sure that -\file{/usr/local/bin} is in your shell search path before \file{/usr/bin}, -where the Apple-supplied Python lives (which is version 2.2, as of Mac OS X -10.2.4). +\file{/usr/local/bin} is in your shell search path. To run your script from the Finder you have two options: \begin{itemize} @@ -101,263 +104,6 @@ Installing additional Python packages is most easily done through the Package Manager, see the MacPython Help Book for details. -\section{Getting and Installing MacPython-OS9 \label{getting}} - -The most recent release version as well as possible newer experimental -versions are best found at the MacPython page maintained by Jack -Jansen: \url{http://homepages.cwi.nl/\textasciitilde jack/macpython.html}. - -Please refer to the \file{README} included with your distribution for -the most up-to-date instructions. - -Note that MacPython-OS9 runs fine on Mac OS X, and it runs in native -mode, not in the Classic environment. Unless you have specific -requirements for a CFM-based Python there is no reason not to -use MacPython-OSX, though. - - -\subsection{Entering the interactive Interpreter - \label{interpreter}} - -The interactive interpreter that you will see used in Python -documentation is started by double-clicking the -\program{PythonInterpreter} icon, which looks like a 16-ton weight -falling. You should see the version information and the -\samp{>\code{>}>~} prompt. Use it exactly as described in the -standard documentation. - - -\subsection{How to run a Python script} - -There are several ways to run an existing Python script; two common -ways to run a Python script are ``drag and drop'' and ``double -clicking''. Other ways include running it from within the IDE (see -Section \ref{IDE}), or launching via AppleScript. - - -\subsubsection{Drag and drop} - -One of the easiest ways to launch a Python script is via ``Drag and -Drop''. This is just like launching a text file in the Finder by -``dragging'' it over your word processor's icon and ``dropping'' it -there. Make sure that you use an icon referring to the -\program{PythonInterpreter}, not the \program{IDE} or \program{Idle} -icons which have different behaviour which is described below. - -Some things that might have gone wrong: - -\begin{itemize} -\item -A window flashes after dropping the script onto the -\program{PythonInterpreter}, but then disappears. Most likely this is a -configuration issue; your \program{PythonInterpreter} is setup to exit -immediately upon completion, but your script assumes that if it prints -something that text will stick around for a while. To fix this, see -section \ref{defaults}. - -\item -When you waved the script icon over the \program{PythonInterpreter}, -the \program{PythonInterpreter} icon did not highlight. Most likely -the Creator code and document type is unset (or set incorrectly) -- -this often happens when a file originates on a non-Mac computer. See -section \ref{creator-code} for more details. -\end{itemize} - - -\subsubsection{Set Creator and Double Click \label{creator-code}} - -If the script that you want to launch has the appropriate Creator Code -and File Type you can simply double-click on the script to launch it. -To be ``double-clickable'' a file needs to be of type \samp{TEXT}, -with a creator code of \samp{Pyth}. - -Setting the creator code and filetype can be done with the IDE (see -sections \ref{IDEwrite} and \ref{IDEapplet}), with an editor with a -Python mode (\program{BBEdit}) -- see section -\ref{scripting-with-BBedit}, or with assorted other Mac utilities, but -a script (\file{fixfiletypes.py}) has been included in the MacPython -distribution, making it possible to set the proper Type and Creator -Codes with Python. - -The \file{fixfiletypes.py} script will change the file type and -creator codes for the indicated directory. To use -\file{fixfiletypes.py}: - -\begin{enumerate} -\item -Locate it in the \file{scripts} folder of the \file{Mac} folder of the -MacPython distribution. - -\item -Put all of the scripts that you want to fix in a folder with nothing -else in it. - -\item -Double-click on the \file{fixfiletypes.py} icon. - -\item -Navigate into the folder of files you want to fix, and press the -``Select current folder'' button. -\end{enumerate} - - -\subsection{Simulating command line arguments - \label{argv}} - -There are two ways to simulate command-line arguments with MacPython-OS9. - -\begin{enumerate} -\item via Interpreter options -\begin{itemize} % nestable? I hope so! - \item Hold the option-key down when launching your script. This will - bring up a dialog box of Python Interpreter options. - \item Click ``Set \UNIX-style command line..'' button. - \item Type the arguments into the ``Argument'' field. - \item Click ``OK'' - \item Click ``Run''. -\end{itemize} % end - -\item via drag and drop -If you save the script as an applet (see Section \ref{IDEapplet}), you -can also simulate some command-line arguments via -``Drag-and-Drop''. In this case, the names of the files that were -dropped onto the applet will be appended to \code{sys.argv}, so that -it will appear to the script as though they had been typed on a -command line. As on \UNIX\ systems, the first item in \code{sys.srgv} is -the path to the applet, and the rest are the files dropped on the -applet. -\end{enumerate} - - -\subsection{Creating a Python script} - -Since Python scripts are simply text files, they can be created in any -way that text files can be created, but some special tools also exist -with extra features. - - -\subsubsection{In an editor} - -You can create a text file with any word processing program such as -\program{MSWord} or \program{AppleWorks} but you need to make sure -that the file is saved as ``\ASCII'' or ``plain text''. This also -works for \program{TextEdit}, but you need to use the command ``Make Plain Text`` -in the ``Format`` menu before trying to save. - - -\subsubsection{Editors with Python modes} - -Several text editors have additional features that add functionality -when you are creating a Python script. These can include coloring -Python keywords to make your code easier to read, module browsing, or -a built-in debugger. These include \program{Alpha}, \program{Pepper}, -and \program{BBedit}, and the MacPython IDE (Section \ref{IDE}). - -%\subsubsection{Alpha} -% **NEED INFO HERE** - -\subsubsection{BBedit \label{scripting-with-BBedit}} - -If you use \program{BBEdit} to create your scripts you will want to tell it about the Python creator code so that -you can simply double click on the saved file to launch it. -\begin{itemize} - \item Launch \program{BBEdit}. - \item Select ``Preferences'' from the ``Edit'' menu. - \item Select ``File Types'' from the scrolling list. - \item click on the ``Add...'' button and navigate to - \program{PythonInterpreter} in the main directory of the - MacPython distribution; click ``open''. - \item Click on the ``Save'' button in the Preferences panel. -\end{itemize} -% Are there additional BBedit Python-specific features? I'm not aware of any. - -%\subsubsection{IDE} -%You can use the \program{Python IDE} supplied in the MacPython Distribution to create longer Python scripts -%-- see Section \ref{IDEwrite} for details. - -%\subsubsection{IDLE} -%Idle is an IDE for Python that was written in Python, using TKInter. You should be able to use it on a Mac by following -%the standard documentation, but see Section \ref{TKInter} for guidance on using TKInter with MacPython. - -%\subsubsection{Pepper} -% **NEED INFO HERE** - -\subsection{Configuration \label{configuration}} - -The MacPython distribution comes with \program{EditPythonPrefs}, an -applet which will help you to customize the MacPython environment for -your working habits. - -\subsubsection{EditPythonPrefs\label{EditPythonPrefs}} - -\program{EditPythonPrefs} gives you the capability to configure Python -to behave the way you want it to. There are two ways to use -\program{EditPythonPrefs}, you can use it to set the preferences in -general, or you can drop a particular Python engine onto it to -customize only that version. The latter can be handy if, for example, -you want to have a second copy of the \program{PythonInterpreter} that -keeps the output window open on a normal exit even though you prefer -to normally not work that way. - -To change the default preferences, simply double-click on -\program{EditPythonPrefs}. To change the preferences only for one copy -of the Interpreter, drop the icon for that copy onto -\program{EditPythonPrefs}. You can also use \program{EditPythonPrefs} -in this fashion to set the preferences of the \program{Python IDE} and -any applets you create -- see section %s \ref{BuildApplet} and -\ref{IDEapplet}. - -\subsubsection{Adding modules to the Module Search Path - \label{search-path}} - -When executing an \keyword{import} statement, Python looks for modules -in places defined by the \member{sys.path} To edit the -\member{sys.path} on a Mac, launch \program{EditPythonPrefs}, and -enter them into the largish field at the top (one per line). - -Since MacPython defines a main Python directory, the easiest thing is -to add folders to search within the main Python directory. To add a -folder of scripts that you created called ``My Folder'' located in the -main Python Folder, enter \samp{\$(PYTHON):My Folder} onto a new line. - -To add the Desktop under OS 9 or below, add -\samp{StartupDriveName:Desktop Folder} on a new line. - -\subsubsection{Default startup options \label{defaults}} - -% I'm assuming that there exists some other documentation on the -% rest of the options so I only go over a couple here. - -The ``Default startup options...'' button in the -\program{EditPythonPrefs} dialog box gives you many options including -the ability to keep the ``Output'' window open after the script -terminates, and the ability to enter interactive mode after the -termination of the run script. The latter can be very helpful if you -want to examine the objects that were created during your script. - -%\section{Nifty Tools} -%There are many other tools included with the MacPython -%distribution. In addition to those discussed here, make -%sure to check the \file{Mac} directory. - -%\subsection{BuildApplet \label{BuildApplet}} -% **NEED INFO HERE** - -%\subsection{BuildApplication} -% **NEED INFO HERE** - -%\section{TKInter on the Mac \label{TKInter}} - -%TKinter is installed by default with the MacPython distribution, but -%you may need to add the \file{lib-tk} folder to the Python Path (see -%section \ref{search-path}). Also, it is important that you do not -%try to launch Tk from within the \program{Python IDE} because the two -%event loops will collide -- always run a script which uses Tkinter -%with the \program{PythonInterpreter} instead -- see section -%\ref{interpreter}. - -%\section{CGI on the Mac with Python \label{CGI}} -%**NEED INFO HERE** \section{The IDE\label{IDE}} @@ -365,15 +111,13 @@ separate application that acts as a text editor for your Python code, a class browser, a graphical debugger, and more. +The online Python Help contains a quick walkthrough of the IDE that +shows the major features and how to use them. \subsection{Using the ``Python Interactive'' window} -Use this window like you would the \program{PythonInterpreter}, except -that you cannot use the ``Drag and drop'' method above. Instead, -dropping a script onto the \program{Python IDE} icon will open the -file in a separate script window (which you can then execute manually --- see section \ref{IDEexecution}). - +Use this window like you would use a normal \UNIX{} command line +interpreter. \subsection{Writing a Python Script \label{IDEwrite}} @@ -386,11 +130,6 @@ ``File'' menu. Dropping a Python script onto the \program{Python IDE} will open it for editing. -If you try to open a script with the \program{Python IDE} but either -can't locate it from the ``Open'' dialog box, or you get an error -message like ``Can't open file of type ...'' see section -\ref{creator-code}. - When the \program{Python IDE} saves a script, it uses the creator code settings which are available by clicking on the small black triangle on the top right of the document window, and selecting ``save @@ -398,8 +137,8 @@ IDE} as the creator, this means that you can open the file for editing by simply double-clicking on its icon. You might want to change this behaviour so that it will be opened by the -\program{PythonInterpreter}, and run. To do this simply choose -``Python Interpreter'' from the ``save options''. Note that these +\program{PythonLauncher}, and run. To do this simply choose +``PythonLauncher'' from the ``save options''. Note that these options are associated with the \emph{file} not the application. @@ -449,4 +188,31 @@ %\subsection{The ``Scripts'' menu} % **NEED INFO HERE** - + +\section{The Package Manager} + +Historically MacPython came with a number of useful extension packages +included, because most Macintosh users do not have access to a development +environment and C compiler. For Mac OS X that bundling is no longer done, +but a new mechanism has been made available to allow easy access to +extension packages. + +The Python Package Manager helps you installing additional packages +that enhance Python. It determines the exact MacOS version and Python +version you have and uses that information to download a database that +has packages that are tested and tried on that combination. In other +words: if something is in your Package Manager window but does not work +you are free to blame the database maintainer. + +PackageManager then checks which of the packages you have installed and +which ones are not. This should also work when you have installed packages +outside of PackageManager. You can select packages and install them, +and PackageManager will work out the requirements and install these too. + +Often PackageManager will list a package in two flavors: binary and +source. Binary should always work, source will only work if you have +installed the Apple Developer Tools. PackageManager will warn you about +this, and also about other external dependencies. + +PackageManager is available as a separate application and also as a +function of the IDE, through the File->Package Manager menu entry. From jhylton at users.sourceforge.net Sun Oct 16 07:24:32 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:32 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/curses textpad.py, 1.7.2.2, 1.7.2.3 Message-ID: <20051016052432.07F1C1E4018@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/curses In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Lib/curses Modified Files: Tag: ast-branch textpad.py Log Message: Merge head to branch (for the last time) Index: textpad.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/curses/textpad.py,v retrieving revision 1.7.2.2 retrieving revision 1.7.2.3 diff -u -d -r1.7.2.2 -r1.7.2.3 --- textpad.py 7 Jan 2005 06:58:14 -0000 1.7.2.2 +++ textpad.py 16 Oct 2005 05:23:59 -0000 1.7.2.3 @@ -53,7 +53,7 @@ last = self.maxx while 1: if ascii.ascii(self.win.inch(y, last)) != ascii.SP: - last = last + 1 + last = min(self.maxx, last+1) break elif last == 0: break From jhylton at users.sourceforge.net Sun Oct 16 07:24:32 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:32 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/tut glossary.tex, 1.10.2.1, 1.10.2.2 tut.tex, 1.166.2.2, 1.166.2.3 Message-ID: <20051016052432.EAE431E4013@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/tut In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Doc/tut Modified Files: Tag: ast-branch glossary.tex tut.tex Log Message: Merge head to branch (for the last time) Index: glossary.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/tut/glossary.tex,v retrieving revision 1.10.2.1 retrieving revision 1.10.2.2 diff -u -d -r1.10.2.1 -r1.10.2.2 --- glossary.tex 7 Jan 2005 06:57:38 -0000 1.10.2.1 +++ glossary.tex 16 Oct 2005 05:23:58 -0000 1.10.2.2 @@ -24,9 +24,9 @@ \index{byte code} \item[byte code] The internal representation of a Python program in the interpreter. -The byte code is also cached in the \code{.pyc} and \code{.pyo} +The byte code is also cached in \code{.pyc} and \code{.pyo} files so that executing the same file is faster the second time -(compilation from source to byte code can be saved). This +(recompilation from source to byte code can be avoided). This ``intermediate language'' is said to run on a ``virtual machine'' that calls the subroutines corresponding to each bytecode. @@ -37,7 +37,6 @@ \index{coercion} \item[coercion] - The implicit conversion of an instance of one type to another during an operation which involves two arguments of the same type. For example, {}\code{int(3.15)} converts the floating point number to the integer @@ -53,7 +52,6 @@ \index{complex number} \item[complex number] - An extension of the familiar real number system in which all numbers are expressed as a sum of a real part and an imaginary part. Imaginary numbers are real multiples of the imaginary unit (the square root of {}\code{-1}), @@ -85,6 +83,17 @@ can be any object with a \method{__hash__()} function, not just integers starting from zero. Called a hash in Perl. +\index{duck-typing} +\item[duck-typing] +Pythonic programming style that determines an object's type by inspection +of its method or attribute signature rather than by explicit relationship +to some type object ("If it looks like a duck and quacks like a duck, it +must be a duck.") By emphasizing interfaces rather than specific types, +well-designed code improves its flexibility by allowing polymorphic +substitution. Duck-typing avoids tests using \function{type()} or +\function{isinstance()}. Instead, it typically employs +\function{hasattr()} tests or {}\emph{EAFP} programming. + \index{EAFP} \item[EAFP] Easier to ask for forgiveness than permission. This common Python @@ -106,7 +115,7 @@ from __future__ import division \end{verbatim} -the expression \code{11/4} would evaluate to \code{2.75}. By actually +the expression \code{11/4} would evaluate to \code{2.75}. By importing the \ulink{\module{__future__}}{../lib/module-future.html} module and evaluating its variables, you can see when a new feature was first added to the language and when it will become the default: @@ -238,6 +247,13 @@ return the same exhausted iterator object used in the previous iteration pass, making it appear like an empty container. +\index{LBYL} +\item[LBYL] +Look before you leap. This coding style explicitly tests for +pre-conditions before making calls or lookups. This style contrasts +with the \emph{EAFP} approach and is characterized by the presence of +many \keyword{if} statements. + \index{list comprehension} \item[list comprehension] A compact way to process all or a subset of elements in a sequence and @@ -247,14 +263,6 @@ The \keyword{if} clause is optional. If omitted, all elements in {}\code{range(256)} are processed. - -\index{LBYL} -\item[LBYL] -Look before you leap. This coding style explicitly tests for -pre-conditions before making calls or lookups. This style contrasts -with the \emph{EAFP} approach and is characterized by the presence of -many \keyword{if} statements. - \index{mapping} \item[mapping] A container object (such as \class{dict}) that supports arbitrary key @@ -282,11 +290,11 @@ \item[namespace] The place where a variable is stored. Namespaces are implemented as dictionaries. There are the local, global and builtin namespaces -as well asnested namespaces in objects (in methods). Namespaces support +as well as nested namespaces in objects (in methods). Namespaces support modularity by preventing naming conflicts. For instance, the functions \function{__builtin__.open()} and \function{os.open()} are distinguished by their namespaces. Namespaces also aid readability -and maintainability by making it clear which modules implement a +and maintainability by making it clear which module implements a function. For instance, writing \function{random.seed()} or {}\function{itertools.izip()} makes it clear that those functions are implemented by the \ulink{\module{random}}{../lib/module-random.html} @@ -313,7 +321,7 @@ \index{Python3000} \item[Python3000] -A mythical python release, not required be backward compatible, with +A mythical python release, not required to be backward compatible, with telepathic interface. \index{__slots__} Index: tut.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/tut/tut.tex,v retrieving revision 1.166.2.2 retrieving revision 1.166.2.3 diff -u -d -r1.166.2.2 -r1.166.2.3 --- tut.tex 7 Jan 2005 06:57:38 -0000 1.166.2.2 +++ tut.tex 16 Oct 2005 05:23:58 -0000 1.166.2.3 @@ -75,7 +75,7 @@ If you ever wrote a large shell script, you probably know this feeling: you'd love to add yet another feature, but it's already so slow, and so big, and so complicated; or the feature involves a system -call or other function that is only accessible from C \ldots Usually +call or other function that is only accessible from C\ldots\ Usually the problem at hand isn't serious enough to warrant rewriting the script in C; perhaps the problem requires variable-length strings or other data types (like sorted lists of file names) that are easy in @@ -100,7 +100,7 @@ \emph{Awk} or even \emph{Perl}, yet many things are at least as easy in Python as in those languages. -Python allows you to split up your program in modules that can be +Python allows you to split your program in modules that can be reused in other Python programs. It comes with a large collection of standard modules that you can use as the basis of your programs --- or as examples to start learning to program in Python. Some of these @@ -114,7 +114,7 @@ programs, or to test functions during bottom-up program development. It is also a handy desk calculator. -Python allows writing very compact and readable programs. Programs +Python enables programs to be written compactly and readably. Programs written in Python are typically much shorter than equivalent C or \Cpp{} programs, for several reasons: \begin{itemize} @@ -145,7 +145,7 @@ Now that you are all excited about Python, you'll want to examine it in some more detail. Since the best way to learn a language is -using it, you are invited to do so with this tutorial. +to use it, you are invited to do so with this tutorial. In the next chapter, the mechanics of using the interpreter are explained. This is rather mundane information, but essential for @@ -175,6 +175,16 @@ your local Python guru or system administrator. (E.g., \file{/usr/local/python} is a popular alternative location.) +On Windows machines, the Python installation is usually placed in +\file{C:\e Python24}, though you can change this when you're running +the installer. To add this directory to your path, +you can type the following command into the command prompt in a DOS box: + +\begin{verbatim} +set path=%path%;C:\python24 +\end{verbatim} + + Typing an end-of-file character (\kbd{Control-D} on \UNIX, \kbd{Control-Z} on Windows) at the primary prompt causes the interpreter to exit with a zero exit status. If that doesn't work, @@ -283,7 +293,7 @@ unconditionally fatal and cause an exit with a nonzero exit; this applies to internal inconsistencies and some cases of running out of memory. All error messages are written to the standard error stream; -normal output from the executed commands is written to standard +normal output from executed commands is written to standard output. Typing the interrupt character (usually Control-C or DEL) to the @@ -313,7 +323,7 @@ the hash, or pound, character, \character{\#}, is used to start a comment in Python. -The script can be given a executable mode, or permission, using the +The script can be given an executable mode, or permission, using the \program{chmod} command: \begin{verbatim} @@ -852,7 +862,7 @@ Unicode has the advantage of providing one ordinal for every character in every script used in modern and ancient texts. Previously, there -were only 256 possible ordinals for script characters and texts were +were only 256 possible ordinals for script characters. Texts were typically bound to a code page which mapped the ordinals to script characters. This lead to very much confusion especially with respect to internationalization (usually written as \samp{i18n} --- @@ -867,7 +877,7 @@ u'Hello World !' \end{verbatim} -The small \character{u} in front of the quote indicates that an +The small \character{u} in front of the quote indicates that a Unicode string is supposed to be created. If you want to include special characters in the string, you can do so by using the Python \emph{Unicode-Escape} encoding. The following example shows how: @@ -1217,7 +1227,7 @@ \end{verbatim} The given end point is never part of the generated list; -\code{range(10)} generates a list of 10 values, exactly the legal +\code{range(10)} generates a list of 10 values, the legal indices for items of a sequence of length 10. It is possible to let the range start at another number, or to specify a different increment (even negative; sometimes this is called the `step'): @@ -1416,7 +1426,7 @@ same name without causing ambiguity. (It is possible to define your own object types and methods, using \emph{classes}, as discussed later in this tutorial.) -The method \method{append()} shown in the example, is defined for +The method \method{append()} shown in the example is defined for list objects; it adds a new element at the end of the list. In this example it is equivalent to \samp{result = result + [b]}, but more efficient. @@ -1511,7 +1521,7 @@ \begin{verbatim} def parrot(voltage, state='a stiff', action='voom', type='Norwegian Blue'): print "-- This parrot wouldn't", action, - print "if you put", voltage, "Volts through it." + print "if you put", voltage, "volts through it." print "-- Lovely plumage, the", type print "-- It's", state, "!" \end{verbatim} @@ -1636,7 +1646,7 @@ \subsection{Lambda Forms \label{lambda}} By popular demand, a few features commonly found in functional -programming languages and Lisp have been added to Python. With the +programming languages like Lisp have been added to Python. With the \keyword{lambda} keyword, small anonymous functions can be created. Here's a function that returns the sum of its two arguments: \samp{lambda a, b: a+b}. Lambda forms can be used wherever function @@ -1743,8 +1753,8 @@ \begin{methoddesc}[list]{pop}{\optional{i}} Remove the item at the given position in the list, and return it. If -no index is specified, \code{a.pop()} returns the last item in the -list. The item is also removed from the list. (The square brackets +no index is specified, \code{a.pop()} removes and returns the last item +in the list. (The square brackets around the \var{i} in the method signature denote that the parameter is optional, not that you should type square brackets at that position. You will see this notation frequently in the @@ -1847,10 +1857,12 @@ There are three built-in functions that are very useful when used with lists: \function{filter()}, \function{map()}, and \function{reduce()}. -\samp{filter(\var{function}, \var{sequence})} returns a sequence (of -the same type, if possible) consisting of those items from the -sequence for which \code{\var{function}(\var{item})} is true. For -example, to compute some primes: +\samp{filter(\var{function}, \var{sequence})} returns a sequence +consisting of those items from the +sequence for which \code{\var{function}(\var{item})} is true. +If \var{sequence} is a \class{string} or \class{tuple}, the result will +be of the same type; otherwise, it is always a \class{list}. +For example, to compute some primes: \begin{verbatim} >>> def f(x): return x % 2 != 0 and x % 3 != 0 @@ -1964,7 +1976,7 @@ \end{verbatim} List comprehensions are much more flexible than \function{map()} and can be -applied to functions with more than one argument and to nested functions: +applied to complex expressions and nested functions: \begin{verbatim} >>> [str(round(355/113.0, i)) for i in range(1,6)] @@ -1975,7 +1987,9 @@ \section{The \keyword{del} statement \label{del}} There is a way to remove an item from a list given its index instead -of its value: the \keyword{del} statement. This can also be used to +of its value: the \keyword{del} statement. This differs from the +\method{pop()}) method which returns a value. The \keyword{del} +statement can also be used to remove slices from a list (which we did earlier by assignment of an empty list to the slice). For example: @@ -2024,7 +2038,7 @@ ((12345, 54321, 'hello!'), (1, 2, 3, 4, 5)) \end{verbatim} -As you see, on output tuples are alway enclosed in parentheses, so +As you see, on output tuples are always enclosed in parentheses, so that nested tuples are interpreted correctly; they may be input with or without surrounding parentheses, although often parentheses are necessary anyway (if the tuple is part of a larger expression). @@ -2064,7 +2078,7 @@ \end{verbatim} This is called, appropriately enough, \emph{sequence unpacking}. -Sequence unpacking requires that the list of variables on the left +Sequence unpacking requires the list of variables on the left to have the same number of elements as the length of the sequence. Note that multiple assignment is really just a combination of tuple packing and sequence unpacking! @@ -2087,12 +2101,12 @@ \begin{verbatim} >>> basket = ['apple', 'orange', 'apple', 'pear', 'orange', 'banana'] ->>> fruits = set(basket) # create a set without duplicates ->>> fruits +>>> fruit = set(basket) # create a set without duplicates +>>> fruit set(['orange', 'pear', 'apple', 'banana']) ->>> 'orange' in fruits # fast membership testing +>>> 'orange' in fruit # fast membership testing True ->>> 'crabgrass' in fruits +>>> 'crabgrass' in fruit False >>> # Demonstrate set operations on unique letters from two words @@ -2123,9 +2137,9 @@ keys. Tuples can be used as keys if they contain only strings, numbers, or tuples; if a tuple contains any mutable object either directly or indirectly, it cannot be used as a key. You can't use -lists as keys, since lists can be modified in place using their -\method{append()} and \method{extend()} methods, as well as slice and -indexed assignments. +lists as keys, since lists can be modified in place using +index assignments, slice assignments, or methods like +\method{append()} and \method{extend()}. It is best to think of a dictionary as an unordered set of \emph{key: value} pairs, with the requirement that the keys are unique @@ -2146,8 +2160,8 @@ The \method{keys()} method of a dictionary object returns a list of all the keys used in the dictionary, in arbitrary order (if you want it sorted, just apply the \method{sort()} method to the list of keys). To -check whether a single key is in the dictionary, use the -\method{has_key()} method of the dictionary. +check whether a single key is in the dictionary, either use the dictionary's +\method{has_key()} method or the \keyword{in} keyword. Here is a small example using a dictionary: @@ -2166,6 +2180,8 @@ ['guido', 'irv', 'jack'] >>> tel.has_key('guido') True +>>> 'guido' in tel +True \end{verbatim} The \function{dict()} constructor builds dictionaries directly from @@ -2183,6 +2199,14 @@ which are even better suited for the task of supplying key-values pairs to the \function{dict()} constructor. +When the keys are simple strings, it is sometimes easier to specify +pairs using keyword arguments: + +\begin{verbatim} +>>> dict(sape=4139, guido=4127, jack=4098) +{'sape': 4139, 'jack': 4098, 'guido': 4127} +\end{verbatim} + \section{Looping Techniques \label{loopidioms}} @@ -2271,7 +2295,7 @@ whether \code{a} is less than \code{b} and moreover \code{b} equals \code{c}. -Comparisons may be combined by the Boolean operators \code{and} and +Comparisons may be combined using the Boolean operators \code{and} and \code{or}, and the outcome of a comparison (or of any other Boolean expression) may be negated with \code{not}. These have lower priorities than comparison operators; between them, \code{not} has @@ -2284,9 +2308,9 @@ left to right, and evaluation stops as soon as the outcome is determined. For example, if \code{A} and \code{C} are true but \code{B} is false, \code{A and B and C} does not evaluate the -expression \code{C}. In general, the return value of a short-circuit -operator, when used as a general value and not as a Boolean, is the -last evaluated argument. +expression \code{C}. When used as a general value and not as a +Boolean, the return value of a short-circuit operator is the last +evaluated argument. It is possible to assign the result of a comparison or other Boolean expression to a variable. For example, @@ -2317,8 +2341,8 @@ equal. If one sequence is an initial sub-sequence of the other, the shorter sequence is the smaller (lesser) one. Lexicographical ordering for strings uses the \ASCII{} ordering for individual -characters. Some examples of comparisons between sequences with the -same types: +characters. Some examples of comparisons between sequences of the +same type: \begin{verbatim} (1, 2, 3) < (1, 2, 4) @@ -2599,7 +2623,7 @@ These two variables are only defined if the interpreter is in interactive mode. -The variable \code{sys.path} is a list of strings that determine the +The variable \code{sys.path} is a list of strings that determines the interpreter's search path for modules. It is initialized to a default path taken from the environment variable \envvar{PYTHONPATH}, or from a built-in default if \envvar{PYTHONPATH} is not set. You can modify @@ -2637,10 +2661,10 @@ \begin{verbatim} >>> a = [1, 2, 3, 4, 5] ->>> import fibo, sys +>>> import fibo >>> fib = fibo.fib >>> dir() -['__builtins__', '__doc__', '__file__', '__name__', 'fib', 'fib2'] +['__builtins__', '__doc__', '__file__', '__name__', 'a', 'fib', 'fibo', 'sys'] \end{verbatim} Note that it lists all types of names: variables, modules, functions, etc. @@ -2926,8 +2950,9 @@ One question remains, of course: how do you convert values to strings? Luckily, Python has ways to convert any value to a string: pass it to the \function{repr()} or \function{str()} functions. Reverse quotes -(\code{``}) are equivalent to \function{repr()}, but their use is -discouraged. +(\code{``}) are equivalent to \function{repr()}, but they are no +longer used in modern Python code and will likely not be in future +versions of the language. The \function{str()} function is meant to return representations of values which are fairly human-readable, while \function{repr()} is @@ -3015,7 +3040,7 @@ unchanged; this will mess up your column lay-out but that's usually better than the alternative, which would be lying about a value. (If you really want truncation you can always add a slice operation, as in -\samp{x.ljust(~n)[:n]}.) +\samp{x.ljust(n)[:n]}.) There is another method, \method{zfill()}, which pads a numeric string on the left with zeros. It understands about plus and @@ -3103,10 +3128,9 @@ distinction between text and binary files; the end-of-line characters in text files are automatically altered slightly when data is read or written. This behind-the-scenes modification to file data is fine for -\ASCII{} text files, but it'll corrupt binary data like that in JPEGs or -\file{.EXE} files. Be very careful to use binary mode when reading and -writing such files. (Note that the precise semantics of text mode on -the Macintosh depends on the underlying C library being used.) +\ASCII{} text files, but it'll corrupt binary data like that in \file{JPEG} or +\file{EXE} files. Be very careful to use binary mode when reading and +writing such files. \subsection{Methods of File Objects \label{fileMethods}} @@ -3157,6 +3181,21 @@ ['This is the first line of the file.\n', 'Second line of the file\n'] \end{verbatim} +An alternate approach to reading lines is to loop over the file object. +This is memory efficient, fast, and leads to simpler code: + +\begin{verbatim} +>>> for line in f: + print line, + +This is the first line of the file. +Second line of the file +\end{verbatim} + +The alternative approach is simpler but does not provide as fine-grained +control. Since the two approaches manage line buffering differently, +they should not be mixed. + \code{f.write(\var{string})} writes the contents of \var{string} to the file, returning \code{None}. @@ -3333,8 +3372,8 @@ and what caused it. The preceding part of the error message shows the context where the -exception happened, in the form of a stack backtrace. -In general it contains a stack backtrace listing source lines; however, +exception happened, in the form of a stack traceback. +In general it contains a stack traceback listing source lines; however, it will not display lines read from standard input. The \citetitle[../lib/module-exceptions.html]{Python Library @@ -3356,7 +3395,7 @@ ... x = int(raw_input("Please enter a number: ")) ... break ... except ValueError: -... print "Oops! That was no valid number. Try again..." +... print "Oops! That was no valid number. Try again..." ... \end{verbatim} @@ -3390,7 +3429,7 @@ be executed. Handlers only handle exceptions that occur in the corresponding try clause, not in other handlers of the same \keyword{try} statement. An except clause may name multiple exceptions -as a parenthesized list, for example: +as a parenthesized tuple, for example: \begin{verbatim} ... except (RuntimeError, TypeError, NameError): @@ -3445,7 +3484,7 @@ the exception's \emph{argument}. The presence and type of the argument depend on the exception type. -The except clause may specify a variable after the exception name (or list). +The except clause may specify a variable after the exception name (or tuple). The variable is bound to an exception instance with the arguments stored in \code{instance.args}. For convenience, the exception instance defines \method{__getitem__} and \method{__str__} so the arguments can @@ -3633,11 +3672,11 @@ The code in the finally clause is useful for releasing external resources (such as files or network connections), regardless of -whether or not the use of the resource was successful. +whether the use of the resource was successful. A \keyword{try} statement must either have one or more except clauses or one finally clause, but not both (because it would be unclear which -clause should be executed). +clause should be executed first). \chapter{Classes \label{classes}} @@ -3650,7 +3689,7 @@ definition.'' The most important features of classes are retained with full power, however: the class inheritance mechanism allows multiple base classes, a derived class can override any methods of its -base class or classes, a method can call the method of a base class with the +base class or classes, and a method can call the method of a base class with the same name. Objects can contain an arbitrary amount of private data. In \Cpp{} terminology, all class members (including the data members) are @@ -3772,10 +3811,13 @@ If a name is declared global, then all references and assignments go directly to the middle scope containing the module's global names. -Otherwise, all variables found outside of the innermost scope are read-only. +Otherwise, all variables found outside of the innermost scope are read-only +(an attempt to write to such a variable will simply create a \emph{new} +local variable in the innermost scope, leaving the identically named +outer variable unchanged). Usually, the local scope references the local names of the (textually) -current function. Outside of functions, the local scope references +current function. Outside functions, the local scope references the same namespace as the global scope: the module's namespace. Class definitions place yet another namespace in the local scope. @@ -3839,7 +3881,7 @@ object} is created. This is basically a wrapper around the contents of the namespace created by the class definition; we'll learn more about class objects in the next section. The original local scope -(the one in effect just before the class definitions were entered) is +(the one in effect just before the class definition was entered) is reinstated, and the class object is bound here to the class name given in the class definition header (\class{ClassName} in the example). @@ -3864,7 +3906,7 @@ \end{verbatim} then \code{MyClass.i} and \code{MyClass.f} are valid attribute -references, returning an integer and a method object, respectively. +references, returning an integer and a function object, respectively. Class attributes can also be assigned to, so you can change the value of \code{MyClass.i} by assignment. \member{__doc__} is also a valid attribute, returning the docstring belonging to the class: \code{"A @@ -3882,8 +3924,9 @@ the local variable \code{x}. The instantiation operation (``calling'' a class object) creates an -empty object. Many classes like to create objects in a known initial -state. Therefore a class may define a special method named +empty object. Many classes like to create objects with instances +customized to a specific initial state. +Therefore a class may define a special method named \method{__init__()}, like this: \begin{verbatim} @@ -3939,7 +3982,7 @@ del x.counter \end{verbatim} -The other kind of instance attribute references is a \emph{method}. +The other kind of instance attribute reference is a \emph{method}. A method is a function that ``belongs to'' an object. (In Python, the term method is not unique to class instances: other object types can have methods as well. For example, list objects have @@ -3959,13 +4002,13 @@ \subsection{Method Objects \label{methodObjects}} -Usually, a method is called immediately: +Usually, a method is called right after it is bound: \begin{verbatim} x.f() \end{verbatim} -In our example, this will return the string \code{'hello world'}. +In the \class{MyClass} example, this will return the string \code{'hello world'}. However, it is not necessary to call a method right away: \code{x.f} is a method object, and can be stored away and called at a later time. For example: @@ -4043,7 +4086,7 @@ variables and instance variables when glancing through a method. -Conventionally, the first argument of a method is often called +Often, the first argument of a method is called \code{self}. This is nothing more than a convention: the name \code{self} has absolutely no special meaning to Python. (Note, however, that by not following the convention your code may be less @@ -4107,7 +4150,7 @@ Of course, a language feature would not be worthy of the name ``class'' without supporting inheritance. The syntax for a derived class -definition looks as follows: +definition looks like this: \begin{verbatim} class DerivedClassName(BaseClassName): @@ -4119,9 +4162,9 @@ \end{verbatim} The name \class{BaseClassName} must be defined in a scope containing -the derived class definition. Instead of a base class name, an -expression is also allowed. This is useful when the base class is -defined in another module, +the derived class definition. In place of a base class name, other +arbitrary expressions are also allowed. This can be useful, for +example, when the base class is defined in another module: \begin{verbatim} class DerivedClassName(modname.BaseClassName): @@ -4130,7 +4173,7 @@ Execution of a derived class definition proceeds the same as for a base class. When the class object is constructed, the base class is remembered. This is used for resolving attribute references: if a -requested attribute is not found in the class, it is searched in the +requested attribute is not found in the class, the search proceeds to look in the base class. This rule is applied recursively if the base class itself is derived from some other class. @@ -4143,7 +4186,7 @@ Derived classes may override methods of their base classes. Because methods have no special privileges when calling other methods of the same object, a method of a base class that calls another method -defined in the same base class, may in fact end up calling a method of +defined in the same base class may end up calling a method of a derived class that overrides it. (For \Cpp{} programmers: all methods in Python are effectively \keyword{virtual}.) @@ -4158,7 +4201,7 @@ \subsection{Multiple Inheritance \label{multiple}} Python supports a limited form of multiple inheritance as well. A -class definition with multiple base classes looks as follows: +class definition with multiple base classes looks like this: \begin{verbatim} class DerivedClassName(Base1, Base2, Base3): @@ -4317,15 +4360,15 @@ \samp{except B} first), it would have printed B, B, B --- the first matching except clause is triggered. -When an error message is printed for an unhandled exception which is a -class, the class name is printed, then a colon and a space, and +When an error message is printed for an unhandled exception, the +exception's class name is printed, then a colon and a space, and finally the instance converted to a string using the built-in function \function{str()}. \section{Iterators\label{iterators}} -By now, you've probably noticed that most container objects can be looped +By now you have probably noticed that most container objects can be looped over using a \keyword{for} statement: \begin{verbatim} @@ -4364,7 +4407,7 @@ >>> it.next() Traceback (most recent call last): - File "", line 1, in -toplevel- + File "", line 1, in ? it.next() StopIteration \end{verbatim} @@ -4701,7 +4744,7 @@ \section{Performance Measurement\label{performance-measurement}} Some Python users develop a deep interest in knowing the relative -performance between different approaches to the same problem. +performance of different approaches to the same problem. Python provides a measurement tool that answers those questions immediately. @@ -4781,7 +4824,7 @@ Despite the modules names, no direct knowledge or handling of XML is needed. \item The \ulink{\module{email}}{../lib/module-email.html} package is a library for managing email messages, including MIME and other RFC 2822-based message - documents. Unlike \module{smptlib} and \module{poplib} which actually send + documents. Unlike \module{smtplib} and \module{poplib} which actually send and receive messages, the email package has a complete toolset for building or decoding complex message structures (including attachments) and for implementing internet encoding and header protocols. @@ -4865,7 +4908,7 @@ >>> locale.format("%d", x, grouping=True) '1,234,567' >>> locale.format("%s%.*f", (conv['currency_symbol'], - ... conv['int_frac_digits'], x), grouping=True) + ... conv['frac_digits'], x), grouping=True) '$1,234,567.80' \end{verbatim} @@ -4932,8 +4975,8 @@ \end{verbatim} Another application for templating is separating program logic from the -details of multiple output formats. The makes it possible to substitute -custom templates for XML files, plain text reports, and HMTL web reports. +details of multiple output formats. This makes it possible to substitute +custom templates for XML files, plain text reports, and HTML web reports. \section{Working with Binary Data Record Layouts\label{binary-formats}} @@ -4953,7 +4996,7 @@ for i in range(3): # show the first 3 file headers start += 14 fields = struct.unpack('LLLHH', data[start:start+16]) - crc32, comp_size, uncomp_size, filenamesize, extra_size = fields + crc32, comp_size, uncomp_size, filenamesize, extra_size = fields start += 16 filename = data[start:start+filenamesize] @@ -5007,7 +5050,7 @@ While those tools are powerful, minor design errors can result in problems that are difficult to reproduce. So, the preferred approach to task coordination is to concentrate all access to a resource -in a single thread and then using the +in a single thread and then use the \ulink{\module{Queue}}{../lib/module-Queue.html} module to feed that thread with requests from other threads. Applications using \class{Queue} objects for inter-thread communication and coordination @@ -5187,7 +5230,7 @@ \end{verbatim} The \class{Decimal} result keeps a trailing zero, automatically inferring four -place significance from the two digit multiplicands. Decimal reproduces +place significance from multiplicands with two place significance. Decimal reproduces mathematics as done by hand and avoids issues that can arise when binary floating point cannot exactly represent decimal quantities. @@ -5222,29 +5265,61 @@ Reading this tutorial has probably reinforced your interest in using Python --- you should be eager to apply Python to solving your -real-world problems. Now what should you do? +real-world problems. Where should you go to learn more? -You should read, or at least page through, the -\citetitle[../lib/lib.html]{Python Library Reference}, -which gives complete (though terse) reference material about types, -functions, and modules that can save you a lot of time when writing -Python programs. The standard Python distribution includes a -\emph{lot} of code in both C and Python; there are modules to read -\UNIX{} mailboxes, retrieve documents via HTTP, generate random -numbers, parse command-line options, write CGI programs, compress -data, and a lot more; skimming through the Library Reference will give -you an idea of what's available. +This tutorial is part of Python's documentation set. +Some other documents in the set are: -The major Python Web site is \url{http://www.python.org/}; it contains +\begin{itemize} + +\item \citetitle[../lib/lib.html]{Python Library Reference}: + +You should browse through this manual, which gives complete (though +terse) reference material about types, functions, and the modules in +the standard library. The standard Python distribution includes a +\emph{lot} of additional code. There are modules to read \UNIX{} +mailboxes, retrieve documents via HTTP, generate random numbers, parse +command-line options, write CGI programs, compress data, and many other tasks. +Skimming through the Library Reference will give you an idea of +what's available. + +\item \citetitle[../inst/inst.html]{Installing Python Modules} +explains how to install external modules written by other Python +users. + +\item \citetitle[../ref/ref.html]{Language Reference}: A detailed +explanation of Python's syntax and semantics. It's heavy reading, +but is useful as a + +\end{itemize} + +More Python resources: + +\begin{itemize} + +\item \url{http://www.python.org}: The major Python Web site. It contains code, documentation, and pointers to Python-related pages around the Web. This Web site is mirrored in various places around the world, such as Europe, Japan, and Australia; a mirror may be faster -than the main site, depending on your geographical location. A more -informal site is \url{http://starship.python.net/}, which contains a -bunch of Python-related personal home pages; many people have -downloadable software there. Many more user-created Python modules -can be found in the \ulink{Python Package -Index}{http://www.python.org/pypi} (PyPI). +than the main site, depending on your geographical location. + +\item \url{http://docs.python.org}: Fast access to Python's +documentation. + +\item \url{http://cheeseshop.python.org}: +The Python Package Index, nicknamed the Cheese Shop, +is an index of user-created Python modules that are available for +download. Once you begin releasing code, you can register it +here so that others can find it. + +\item \url{http://aspn.activestate.com/ASPN/Python/Cookbook/}: The +Python Cookbook is a sizable collection of code examples, larger +modules, and useful scripts. Particularly notable contributions are +collected in a book also titled \citetitle{Python Cookbook} (O'Reilly +\& Associates, ISBN 0-596-00797-3.) + +\end{itemize} + For Python-related questions and problem reports, you can post to the newsgroup \newsgroup{comp.lang.python}, or send them to the mailing @@ -5259,7 +5334,7 @@ announcing new modules. Before posting, be sure to check the list of \ulink{Frequently Asked Questions}{http://www.python.org/doc/faq/} (also called the FAQ), or look for it in the \file{Misc/} directory of the Python source distribution. Mailing -list archives are available at \url{http://www.python.org/pipermail/}. +list archives are available at \url{http://mail.python.org/pipermail/}. The FAQ answers many of the questions that come up again and again, and may already contain the solution for your problem. @@ -5275,7 +5350,7 @@ editing. This library has its own documentation which I won't duplicate here; however, the basics are easily explained. The interactive editing and history described here are optionally -available in the \UNIX{} and CygWin versions of the interpreter. +available in the \UNIX{} and Cygwin versions of the interpreter. This chapter does \emph{not} document the editing facilities of Mark Hammond's PythonWin package or the Tk-based environment, IDLE, @@ -5507,7 +5582,7 @@ 0.1000000000000000055511151231257827021181583404541015625 \end{verbatim} -instead! The Python prompt (implicitly) uses the builtin +instead! The Python prompt uses the builtin \function{repr()} function to obtain a string version of everything it displays. For floats, \code{repr(\var{float})} rounds the true decimal value to 17 significant digits, giving @@ -5522,7 +5597,7 @@ \var{x}, but rounding to 16 digits is not enough to make that true. Note that this is in the very nature of binary floating-point: this is -not a bug in Python, it is not a bug in your code either. You'll +not a bug in Python, and it is not a bug in your code either. You'll see the same kind of thing in all languages that support your hardware's floating-point arithmetic (although some languages may not \emph{display} the difference by default, or in all output modes). @@ -5561,8 +5636,8 @@ to round it again can't make it better: it was already as good as it gets. -Another consequence is that since 0.1 is not exactly 1/10, adding 0.1 -to itself 10 times may not yield exactly 1.0, either: +Another consequence is that since 0.1 is not exactly 1/10, +summing ten values of 0.1 may not yield exactly 1.0, either: \begin{verbatim} >>> sum = 0.0 @@ -5603,7 +5678,7 @@ you can perform an exact analysis of cases like this yourself. Basic familiarity with binary floating-point representation is assumed. -\dfn{Representation error} refers to that some (most, actually) +\dfn{Representation error} refers to the fact that some (most, actually) decimal fractions cannot be represented exactly as binary (base 2) fractions. This is the chief reason why Python (or Perl, C, \Cpp, Java, Fortran, and many others) often won't display the exact decimal @@ -5638,9 +5713,9 @@ \begin{verbatim} >>> 2**52 4503599627370496L ->>> 2L**53 +>>> 2**53 9007199254740992L ->>> 2L**56/10 +>>> 2**56/10 7205759403792793L \end{verbatim} @@ -5649,7 +5724,7 @@ quotient rounded: \begin{verbatim} ->>> q, r = divmod(2L**56, 10) +>>> q, r = divmod(2**56, 10) >>> r 6L \end{verbatim} @@ -5677,7 +5752,7 @@ fraction given above, the best 754 double approximation it can get: \begin{verbatim} ->>> .1 * 2L**56 +>>> .1 * 2**56 7205759403792794.0 \end{verbatim} @@ -5685,7 +5760,7 @@ value of its 30 most significant decimal digits: \begin{verbatim} ->>> 7205759403792794L * 10L**30 / 2L**56 +>>> 7205759403792794 * 10**30 / 2**56 100000000000000005551115123125L \end{verbatim} From jhylton at users.sourceforge.net Sun Oct 16 07:24:33 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:33 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Include abstract.h, 2.44.2.2, 2.44.2.3 ceval.h, 2.44.18.2, 2.44.18.3 errcode.h, 2.14.28.1, 2.14.28.2 floatobject.h, 2.20.14.1, 2.20.14.2 graminit.h, 2.19.2.2, 2.19.2.3 import.h, 2.27.28.2, 2.27.28.3 marshal.h, 2.11.26.2, 2.11.26.3 modsupport.h, 2.38.10.2, 2.38.10.3 object.h, 2.101.2.2, 2.101.2.3 objimpl.h, 2.55.2.2, 2.55.2.3 pyerrors.h, 2.54.2.3, 2.54.2.4 pyport.h, 2.51.2.3, 2.51.2.4 pystate.h, 2.18.18.2, 2.18.18.3 pythonrun.h, 2.49.2.8, 2.49.2.9 setobject.h, 2.5.4.1, 2.5.4.2 structmember.h, 2.19.8.2, 2.19.8.3 unicodeobject.h, 2.38.2.2, 2.38.2.3 Message-ID: <20051016052433.14E6A1E400D@bag.python.org> Update of /cvsroot/python/python/dist/src/Include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Include Modified Files: Tag: ast-branch abstract.h ceval.h errcode.h floatobject.h graminit.h import.h marshal.h modsupport.h object.h objimpl.h pyerrors.h pyport.h pystate.h pythonrun.h setobject.h structmember.h unicodeobject.h Log Message: Merge head to branch (for the last time) Index: abstract.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/abstract.h,v retrieving revision 2.44.2.2 retrieving revision 2.44.2.3 diff -u -d -r2.44.2.2 -r2.44.2.3 --- abstract.h 7 Jan 2005 06:57:41 -0000 2.44.2.2 +++ abstract.h 16 Oct 2005 05:23:59 -0000 2.44.2.3 @@ -422,6 +422,21 @@ PyAPI_FUNC(int) PyObject_Length(PyObject *o); #define PyObject_Length PyObject_Size + PyAPI_FUNC(int) _PyObject_LengthCue(PyObject *o); + + /* + Return the size of object o. If the object, o, provides + both sequence and mapping protocols, the sequence size is + returned. On error, -1 is returned. If the object provides + a _length_cue() method, its value is returned. This is the + equivalent to the Python expression: + try: + return len(o) + except (AttributeError, TypeError): + if hasattr(o, '_length_cue'): + return o._length_cue() + raise + */ PyAPI_FUNC(PyObject *) PyObject_GetItem(PyObject *o, PyObject *key); Index: ceval.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/ceval.h,v retrieving revision 2.44.18.2 retrieving revision 2.44.18.3 diff -u -d -r2.44.18.2 -r2.44.18.3 --- ceval.h 7 Jan 2005 06:57:41 -0000 2.44.18.2 +++ ceval.h 16 Oct 2005 05:23:59 -0000 2.44.18.3 @@ -65,6 +65,7 @@ PyAPI_FUNC(PyObject *) PyEval_GetCallStats(PyObject *); PyAPI_FUNC(PyObject *) PyEval_EvalFrame(struct _frame *); +PyAPI_FUNC(PyObject *) PyEval_EvalFrameEx(struct _frame *f, int exc); /* this used to be handled on a per-thread basis - now just two globals */ PyAPI_DATA(volatile int) _Py_Ticker; Index: errcode.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/errcode.h,v retrieving revision 2.14.28.1 retrieving revision 2.14.28.2 diff -u -d -r2.14.28.1 -r2.14.28.2 --- errcode.h 28 Apr 2003 17:32:58 -0000 2.14.28.1 +++ errcode.h 16 Oct 2005 05:23:59 -0000 2.14.28.2 @@ -28,6 +28,7 @@ #define E_DECODE 22 /* Error in decoding into Unicode */ #define E_EOFS 23 /* EOF in triple-quoted string */ #define E_EOLS 24 /* EOL in single-quoted string */ +#define E_LINECONT 25 /* Unexpected characters after a line continuation */ #ifdef __cplusplus } Index: floatobject.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/floatobject.h,v retrieving revision 2.20.14.1 retrieving revision 2.20.14.2 diff -u -d -r2.20.14.1 -r2.20.14.2 --- floatobject.h 28 Apr 2003 17:32:58 -0000 2.20.14.1 +++ floatobject.h 16 Oct 2005 05:23:59 -0000 2.20.14.2 @@ -55,13 +55,18 @@ * routines produce a C double from such a string. The suffix (4 or 8) * specifies the number of bytes in the string. * - * Excepting NaNs and infinities (which aren't handled correctly), the 4- - * byte format is identical to the IEEE-754 single precision format, and - * the 8-byte format to the IEEE-754 double precision format. On non- - * IEEE platforms with more precision, or larger dynamic range, than - * 754 supports, not all values can be packed; on non-IEEE platforms with - * less precision, or smaller dynamic range, not all values can be - * unpacked. What happens in such cases is partly accidental (alas). + * On platforms that appear to use (see _PyFloat_Init()) IEEE-754 formats + * these functions work by copying bits. On other platforms, the formats the + * 4- byte format is identical to the IEEE-754 single precision format, and + * the 8-byte format to the IEEE-754 double precision format, although the + * packing of INFs and NaNs (if such things exist on the platform) isn't + * handled correctly, and attempting to unpack a string containing an IEEE + * INF or NaN will raise an exception. + * + * On non-IEEE platforms with more precision, or larger dynamic range, than + * 754 supports, not all values can be packed; on non-IEEE platforms with less + * precision, or smaller dynamic range, not all values can be unpacked. What + * happens in such cases is partly accidental (alas). */ /* The pack routines write 4 or 8 bytes, starting at p. le is a bool @@ -70,8 +75,9 @@ * first, at p). * Return value: 0 if all is OK, -1 if error (and an exception is * set, most likely OverflowError). - * Bug: What this does is undefined if x is a NaN or infinity. - * Bug: -0.0 and +0.0 produce the same string. + * There are two problems on non-IEEE platforms: + * 1): What this does is undefined if x is a NaN or infinity. + * 2): -0.0 and +0.0 produce the same string. */ PyAPI_FUNC(int) _PyFloat_Pack4(double x, unsigned char *p, int le); PyAPI_FUNC(int) _PyFloat_Pack8(double x, unsigned char *p, int le); @@ -81,9 +87,8 @@ * last, at p+3 or p+7), false if big-endian (exponent first, at p). * Return value: The unpacked double. On error, this is -1.0 and * PyErr_Occurred() is true (and an exception is set, most likely - * OverflowError). - * Bug: What this does is undefined if the string represents a NaN or - * infinity. + * OverflowError). Note that on a non-IEEE platform this will refuse + * to unpack a string that represents a NaN or infinity. */ PyAPI_FUNC(double) _PyFloat_Unpack4(const unsigned char *p, int le); PyAPI_FUNC(double) _PyFloat_Unpack8(const unsigned char *p, int le); Index: graminit.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/graminit.h,v retrieving revision 2.19.2.2 retrieving revision 2.19.2.3 diff -u -d -r2.19.2.2 -r2.19.2.3 --- graminit.h 7 Jan 2005 06:57:41 -0000 2.19.2.2 +++ graminit.h 16 Oct 2005 05:23:59 -0000 2.19.2.3 @@ -76,3 +76,4 @@ #define gen_if 331 #define testlist1 332 #define encoding_decl 333 +#define yield_expr 334 Index: import.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/import.h,v retrieving revision 2.27.28.2 retrieving revision 2.27.28.3 diff -u -d -r2.27.28.2 -r2.27.28.3 --- import.h 7 Jan 2005 06:57:41 -0000 2.27.28.2 +++ import.h 16 Oct 2005 05:23:59 -0000 2.27.28.3 @@ -24,6 +24,7 @@ PyAPI_FUNC(struct filedescr *) _PyImport_FindModule( const char *, PyObject *, char *, size_t, FILE **, PyObject **); PyAPI_FUNC(int) _PyImport_IsScript(struct filedescr *); +PyAPI_FUNC(void) _PyImport_ReInitLock(void); PyAPI_FUNC(PyObject *)_PyImport_FindExtension(char *, char *); PyAPI_FUNC(PyObject *)_PyImport_FixupExtension(char *, char *); Index: marshal.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/marshal.h,v retrieving revision 2.11.26.2 retrieving revision 2.11.26.3 diff -u -d -r2.11.26.2 -r2.11.26.3 --- marshal.h 7 Jan 2005 06:57:42 -0000 2.11.26.2 +++ marshal.h 16 Oct 2005 05:23:59 -0000 2.11.26.3 @@ -7,7 +7,7 @@ extern "C" { #endif -#define Py_MARSHAL_VERSION 1 +#define Py_MARSHAL_VERSION 2 PyAPI_FUNC(void) PyMarshal_WriteLongToFile(long, FILE *, int); PyAPI_FUNC(void) PyMarshal_WriteObjectToFile(PyObject *, FILE *, int); Index: modsupport.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/modsupport.h,v retrieving revision 2.38.10.2 retrieving revision 2.38.10.3 diff -u -d -r2.38.10.2 -r2.38.10.3 --- modsupport.h 7 Jan 2005 06:57:42 -0000 2.38.10.2 +++ modsupport.h 16 Oct 2005 05:23:59 -0000 2.38.10.3 @@ -15,6 +15,7 @@ char *, char **, ...); PyAPI_FUNC(int) PyArg_UnpackTuple(PyObject *, char *, int, int, ...); PyAPI_FUNC(PyObject *) Py_BuildValue(char *, ...); +PyAPI_FUNC(int) _PyArg_NoKeywords(char *funcname, PyObject *kw); PyAPI_FUNC(int) PyArg_VaParse(PyObject *, char *, va_list); PyAPI_FUNC(int) PyArg_VaParseTupleAndKeywords(PyObject *, PyObject *, Index: object.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/object.h,v retrieving revision 2.101.2.2 retrieving revision 2.101.2.3 diff -u -d -r2.101.2.2 -r2.101.2.3 --- object.h 7 Jan 2005 06:57:42 -0000 2.101.2.2 +++ object.h 16 Oct 2005 05:23:59 -0000 2.101.2.3 @@ -371,6 +371,7 @@ PyAPI_FUNC(int) PyObject_Print(PyObject *, FILE *, int); PyAPI_FUNC(void) _PyObject_Dump(PyObject *); PyAPI_FUNC(PyObject *) PyObject_Repr(PyObject *); +PyAPI_FUNC(PyObject *) _PyObject_Str(PyObject *); PyAPI_FUNC(PyObject *) PyObject_Str(PyObject *); #ifdef Py_USING_UNICODE PyAPI_FUNC(PyObject *) PyObject_Unicode(PyObject *); Index: objimpl.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/objimpl.h,v retrieving revision 2.55.2.2 retrieving revision 2.55.2.3 diff -u -d -r2.55.2.2 -r2.55.2.3 --- objimpl.h 7 Jan 2005 06:57:42 -0000 2.55.2.2 +++ objimpl.h 16 Oct 2005 05:23:59 -0000 2.55.2.3 @@ -124,7 +124,7 @@ #else /* ! WITH_PYMALLOC */ #define PyObject_MALLOC PyMem_MALLOC #define PyObject_REALLOC PyMem_REALLOC -/* This is an odd one! For backward compatability with old extensions, the +/* This is an odd one! For backward compatibility with old extensions, the PyMem "release memory" functions have to invoke the object allocator's free() function. When pymalloc isn't enabled, that leaves us using the platform free(). */ Index: pyerrors.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/pyerrors.h,v retrieving revision 2.54.2.3 retrieving revision 2.54.2.4 diff -u -d -r2.54.2.3 -r2.54.2.4 --- pyerrors.h 7 Jan 2005 06:57:42 -0000 2.54.2.3 +++ pyerrors.h 16 Oct 2005 05:23:59 -0000 2.54.2.4 @@ -15,6 +15,12 @@ PyAPI_FUNC(void) PyErr_Fetch(PyObject **, PyObject **, PyObject **); PyAPI_FUNC(void) PyErr_Restore(PyObject *, PyObject *, PyObject *); +#ifdef Py_DEBUG +#define _PyErr_OCCURRED() PyErr_Occurred() +#else +#define _PyErr_OCCURRED() (_PyThreadState_Current->curexc_type) +#endif + /* Error testing and normalization */ PyAPI_FUNC(int) PyErr_GivenExceptionMatches(PyObject *, PyObject *); PyAPI_FUNC(int) PyErr_ExceptionMatches(PyObject *); @@ -25,6 +31,7 @@ PyAPI_DATA(PyObject *) PyExc_Exception; PyAPI_DATA(PyObject *) PyExc_StopIteration; +PyAPI_DATA(PyObject *) PyExc_GeneratorExit; PyAPI_DATA(PyObject *) PyExc_StandardError; PyAPI_DATA(PyObject *) PyExc_ArithmeticError; PyAPI_DATA(PyObject *) PyExc_LookupError; Index: pyport.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/pyport.h,v retrieving revision 2.51.2.3 retrieving revision 2.51.2.4 diff -u -d -r2.51.2.3 -r2.51.2.4 --- pyport.h 7 Jan 2005 06:57:42 -0000 2.51.2.3 +++ pyport.h 16 Oct 2005 05:23:59 -0000 2.51.2.4 @@ -297,7 +297,7 @@ * This isn't reliable. See Py_OVERFLOWED comments. * X is evaluated more than once. */ -#if defined(__FreeBSD__) || defined(__OpenBSD__) +#if defined(__FreeBSD__) || defined(__OpenBSD__) || (defined(__hpux) && defined(__ia64)) #define _Py_SET_EDOM_FOR_NAN(X) if (isnan(X)) errno = EDOM; #else #define _Py_SET_EDOM_FOR_NAN(X) ; Index: pystate.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/pystate.h,v retrieving revision 2.18.18.2 retrieving revision 2.18.18.3 diff -u -d -r2.18.18.2 -r2.18.18.3 --- pystate.h 7 Jan 2005 06:57:42 -0000 2.18.18.2 +++ pystate.h 16 Oct 2005 05:23:59 -0000 2.18.18.3 @@ -53,12 +53,16 @@ #define PyTrace_C_RETURN 6 typedef struct _ts { + /* See Python/ceval.c for comments explaining most fields */ struct _ts *next; PyInterpreterState *interp; struct _frame *frame; int recursion_depth; + /* 'tracing' keeps track of the execution depth when tracing/profiling. + This is to prevent the actual trace/profile code from being recorded in + the trace/profile. */ int tracing; int use_tracing; @@ -75,7 +79,7 @@ PyObject *exc_value; PyObject *exc_traceback; - PyObject *dict; + PyObject *dict; /* Stores per-thread state */ /* tick_counter is incremented whenever the check_interval ticker * reaches zero. The purpose is to give a useful measure of the number Index: pythonrun.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/pythonrun.h,v retrieving revision 2.49.2.8 retrieving revision 2.49.2.9 diff -u -d -r2.49.2.8 -r2.49.2.9 --- pythonrun.h 11 Oct 2005 22:03:13 -0000 2.49.2.8 +++ pythonrun.h 16 Oct 2005 05:23:59 -0000 2.49.2.9 @@ -115,6 +115,7 @@ PyAPI_FUNC(void) _PyImportHooks_Init(void); PyAPI_FUNC(int) _PyFrame_Init(void); PyAPI_FUNC(int) _PyInt_Init(void); +PyAPI_FUNC(void) _PyFloat_Init(void); /* Various internal finalizers */ PyAPI_FUNC(void) _PyExc_Fini(void); @@ -124,6 +125,7 @@ PyAPI_FUNC(void) PyCFunction_Fini(void); PyAPI_FUNC(void) PyTuple_Fini(void); PyAPI_FUNC(void) PyList_Fini(void); +PyAPI_FUNC(void) PySet_Fini(void); PyAPI_FUNC(void) PyString_Fini(void); PyAPI_FUNC(void) PyInt_Fini(void); PyAPI_FUNC(void) PyFloat_Fini(void); Index: setobject.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/setobject.h,v retrieving revision 2.5.4.1 retrieving revision 2.5.4.2 diff -u -d -r2.5.4.1 -r2.5.4.2 --- setobject.h 7 Jan 2005 06:57:42 -0000 2.5.4.1 +++ setobject.h 16 Oct 2005 05:23:59 -0000 2.5.4.2 @@ -1,4 +1,3 @@ - /* Set object interface */ #ifndef Py_SETOBJECT_H @@ -7,34 +6,83 @@ extern "C" { #endif + /* -This data structure is shared by set and frozenset objects. +There are three kinds of slots in the table: + +1. Unused: key == NULL +2. Active: key != NULL and key != dummy +3. Dummy: key == dummy + +Note: .pop() abuses the hash field of an Unused or Dummy slot to +hold a search finger. The hash field of Unused or Dummy slots has +no meaning otherwise. */ +#define PySet_MINSIZE 8 + typedef struct { + long hash; /* cached hash code for the entry key */ + PyObject *key; +} setentry; + + +/* +This data structure is shared by set and frozenset objects. +*/ + +typedef struct _setobject PySetObject; +struct _setobject { PyObject_HEAD - PyObject *data; - long hash; /* only used by frozenset objects */ - PyObject *weakreflist; /* List of weak references */ - /* Invariants: - * data is a dictionary whose values are all True. - * data points to the same dict for the whole life of the set. - * For frozensets only: - * data is immutable. - * hash is the hash of the frozenset or -1 if not computed yet. + int fill; /* # Active + # Dummy */ + int used; /* # Active */ + + /* The table contains mask + 1 slots, and that's a power of 2. + * We store the mask instead of the size because the mask is more + * frequently needed. */ -} PySetObject; + int mask; + + /* table points to smalltable for small tables, else to + * additional malloc'ed memory. table is never NULL! This rule + * saves repeated runtime null-tests. + */ + setentry *table; + setentry *(*lookup)(PySetObject *so, PyObject *key, long hash); + setentry smalltable[PySet_MINSIZE]; + + long hash; /* only used by frozenset objects */ + PyObject *weakreflist; /* List of weak references */ +}; PyAPI_DATA(PyTypeObject) PySet_Type; PyAPI_DATA(PyTypeObject) PyFrozenSet_Type; +/* Invariants for frozensets: + * data is immutable. + * hash is the hash of the frozenset or -1 if not computed yet. + * Invariants for sets: + * hash is -1 + */ + #define PyFrozenSet_CheckExact(ob) ((ob)->ob_type == &PyFrozenSet_Type) +#define PyAnySet_CheckExact(ob) \ + ((ob)->ob_type == &PySet_Type || (ob)->ob_type == &PyFrozenSet_Type) #define PyAnySet_Check(ob) \ ((ob)->ob_type == &PySet_Type || (ob)->ob_type == &PyFrozenSet_Type || \ PyType_IsSubtype((ob)->ob_type, &PySet_Type) || \ PyType_IsSubtype((ob)->ob_type, &PyFrozenSet_Type)) +PyAPI_FUNC(PyObject *) PySet_New(PyObject *); +PyAPI_FUNC(PyObject *) PyFrozenSet_New(PyObject *); +PyAPI_FUNC(int) PySet_Size(PyObject *anyset); +#define PySet_GET_SIZE(so) (((PySetObject *)(so))->used) +PyAPI_FUNC(int) PySet_Contains(PyObject *anyset, PyObject *key); +PyAPI_FUNC(int) PySet_Discard(PyObject *set, PyObject *key); +PyAPI_FUNC(int) PySet_Add(PyObject *set, PyObject *key); +PyAPI_FUNC(PyObject *) PySet_Pop(PyObject *set); + #ifdef __cplusplus } #endif Index: structmember.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/structmember.h,v retrieving revision 2.19.8.2 retrieving revision 2.19.8.3 diff -u -d -r2.19.8.2 -r2.19.8.3 --- structmember.h 7 Jan 2005 06:57:42 -0000 2.19.8.2 +++ structmember.h 16 Oct 2005 05:23:59 -0000 2.19.8.3 @@ -65,6 +65,10 @@ #define T_OBJECT_EX 16 /* Like T_OBJECT, but raises AttributeError when the value is NULL, instead of converting to None. */ +#ifdef HAVE_LONG_LONG +#define T_LONGLONG 17 +#define T_ULONGLONG 18 +#endif /* HAVE_LONG_LONG */ /* Flags */ #define READONLY 1 Index: unicodeobject.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/unicodeobject.h,v retrieving revision 2.38.2.2 retrieving revision 2.38.2.3 diff -u -d -r2.38.2.2 -r2.38.2.3 --- unicodeobject.h 7 Jan 2005 06:57:43 -0000 2.38.2.2 +++ unicodeobject.h 16 Oct 2005 05:23:59 -0000 2.38.2.3 @@ -797,6 +797,16 @@ int length /* Number of Py_UNICODE chars to encode */ ); +/* --- Unicode Internal Codec --------------------------------------------- + + Only for internal use in _codecsmodule.c */ + +PyObject *_PyUnicode_DecodeUnicodeInternal( + const char *string, + int length, + const char *errors + ); + /* --- Latin-1 Codecs ----------------------------------------------------- Note: Latin-1 corresponds to the first 256 Unicode ordinals. From jhylton at users.sourceforge.net Sun Oct 16 07:24:32 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:32 +0200 (CEST) Subject: [Python-checkins] python/dist/src LICENSE, 1.22.2.2, 1.22.2.3 Makefile.pre.in, 1.86.2.6, 1.86.2.7 README, 1.148.2.2, 1.148.2.3 configure, 1.317.2.2, 1.317.2.3 configure.in, 1.327.2.2, 1.327.2.3 pyconfig.h.in, 1.42.2.2, 1.42.2.3 setup.py, 1.98.2.2, 1.98.2.3 Message-ID: <20051016052432.D10331E4017@bag.python.org> Update of /cvsroot/python/python/dist/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718 Modified Files: Tag: ast-branch LICENSE Makefile.pre.in README configure configure.in pyconfig.h.in setup.py Log Message: Merge head to branch (for the last time) Index: LICENSE =================================================================== RCS file: /cvsroot/python/python/dist/src/LICENSE,v retrieving revision 1.22.2.2 retrieving revision 1.22.2.3 diff -u -d -r1.22.2.2 -r1.22.2.3 --- LICENSE 7 Jan 2005 06:56:20 -0000 1.22.2.2 +++ LICENSE 16 Oct 2005 05:23:54 -0000 1.22.2.3 @@ -47,7 +47,9 @@ 2.3.2 2.3.1 2002-2003 PSF yes 2.3.3 2.3.2 2002-2003 PSF yes 2.3.4 2.3.3 2004 PSF yes + 2.3.5 2.3.4 2005 PSF yes 2.4 2.3 2004 PSF yes + 2.4.1 2.4 2005 PSF yes Footnotes: @@ -83,9 +85,9 @@ prepare derivative works, distribute, and otherwise use Python alone or in any derivative version, provided, however, that PSF's License Agreement and PSF's notice of copyright, i.e., "Copyright (c) -2001, 2002, 2003, 2004 Python Software Foundation; All Rights Reserved" -are retained in Python alone or in any derivative version prepared -by Licensee. +2001, 2002, 2003, 2004, 2005 Python Software Foundation; All Rights +Reserved" are retained in Python alone or in any derivative version +prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates Python or any part thereof, and wants to make Index: Makefile.pre.in =================================================================== RCS file: /cvsroot/python/python/dist/src/Makefile.pre.in,v retrieving revision 1.86.2.6 retrieving revision 1.86.2.7 diff -u -d -r1.86.2.6 -r1.86.2.7 --- Makefile.pre.in 20 Mar 2005 23:26:45 -0000 1.86.2.6 +++ Makefile.pre.in 16 Oct 2005 05:23:54 -0000 1.86.2.7 @@ -55,7 +55,7 @@ # Compiler options OPT= @OPT@ BASECFLAGS= @BASECFLAGS@ -CFLAGS= $(BASECFLAGS) $(OPT) +CFLAGS= $(BASECFLAGS) $(OPT) $(EXTRA_CFLAGS) # Both CPPFLAGS and LDFLAGS need to contain the shell's value for setup.py to # be able to build extension modules using the directories specified in the # environment variables @@ -113,8 +113,8 @@ # Deployment target selected during configure, to be checked # by distutils CONFIGURE_MACOSX_DEPLOYMENT_TARGET=@CONFIGURE_MACOSX_DEPLOYMENT_TARGET@ -# Options to enable prebinding (for fast startup) -OTHER_LIBTOOL_OPT = -prebind -seg1addr 0x10000000 +# Options to enable prebinding (for fast startup prior to Mac OS X 10.3) +OTHER_LIBTOOL_OPT=@OTHER_LIBTOOL_OPT@ # Environment to run shared python without installed libraries RUNSHARED= @RUNSHARED@ @@ -387,7 +387,7 @@ $(RESSRCDIR)/English.lproj/InfoPlist.strings $(INSTALL) -d -m $(DIRMODE) $(PYTHONFRAMEWORKDIR)/Versions/$(VERSION) libtool -o $(LDLIBRARY) -dynamic $(OTHER_LIBTOOL_OPT) $(LIBRARY) \ - -framework System @LIBTOOL_CRUFT@ + @LIBTOOL_CRUFT@ $(INSTALL) -d -m $(DIRMODE) \ $(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/Resources/English.lproj $(INSTALL_DATA) $(RESSRCDIR)/Info.plist \ @@ -682,7 +682,8 @@ test/decimaltestdata \ encodings email email/test email/test/data compiler hotshot \ logging bsddb bsddb/test csv idlelib idlelib/Icons \ - distutils distutils/command $(XMLLIBSUBDIRS) curses $(MACHDEPS) + distutils distutils/command distutils/tests $(XMLLIBSUBDIRS) \ + curses $(MACHDEPS) libinstall: $(BUILDPYTHON) $(srcdir)/Lib/$(PLATDIR) @for i in $(SCRIPTDIR) $(LIBDEST); \ do \ @@ -741,11 +742,11 @@ PYTHONPATH=$(DESTDIR)$(LIBDEST) $(RUNSHARED) \ ./$(BUILDPYTHON) -Wi -tt $(DESTDIR)$(LIBDEST)/compileall.py \ -d $(LIBDEST) -f \ - -x 'badsyntax|site-packages' $(DESTDIR)$(LIBDEST) + -x 'bad_coding|badsyntax|site-packages' $(DESTDIR)$(LIBDEST) PYTHONPATH=$(DESTDIR)$(LIBDEST) $(RUNSHARED) \ ./$(BUILDPYTHON) -Wi -tt -O $(DESTDIR)$(LIBDEST)/compileall.py \ -d $(LIBDEST) -f \ - -x 'badsyntax|site-packages' $(DESTDIR)$(LIBDEST) + -x 'bad_coding|badsyntax|site-packages' $(DESTDIR)$(LIBDEST) -PYTHONPATH=$(DESTDIR)$(LIBDEST) $(RUNSHARED) \ ./$(BUILDPYTHON) -Wi -t $(DESTDIR)$(LIBDEST)/compileall.py \ -d $(LIBDEST)/site-packages -f \ Index: README =================================================================== RCS file: /cvsroot/python/python/dist/src/README,v retrieving revision 1.148.2.2 retrieving revision 1.148.2.3 diff -u -d -r1.148.2.2 -r1.148.2.3 --- README 7 Jan 2005 06:56:36 -0000 1.148.2.2 +++ README 16 Oct 2005 05:23:54 -0000 1.148.2.3 @@ -1,7 +1,7 @@ -This is Python version 2.4 alpha 3 +This is Python version 2.5 alpha 0 ================================== -Copyright (c) 2001, 2002, 2003, 2004 Python Software Foundation. +Copyright (c) 2001, 2002, 2003, 2004, 2005 Python Software Foundation. All rights reserved. Copyright (c) 2000 BeOpen.com. @@ -35,6 +35,7 @@ See the file "Misc/NEWS". + If you don't read instructions ------------------------------ @@ -45,30 +46,29 @@ executable "./python"; to install in /usr/local, first do "su root" and then "make install". -The section `Build instructions' below is still recommended reading, -especially the part on customizing Modules/Setup. +The section `Build instructions' below is still recommended reading. What is Python anyway? ---------------------- -Python is an interpreted object-oriented programming language suitable -(amongst other uses) for distributed application development, -scripting, numeric computing and system testing. Python is often -compared to Tcl, Perl, Java, JavaScript, Visual Basic or Scheme. To -find out more about what Python can do for you, point your browser to -http://www.python.org/. +Python is an interpreted, interactive object-oriented programming +language suitable (amongst other uses) for distributed application +development, scripting, numeric computing and system testing. Python +is often compared to Tcl, Perl, Java, JavaScript, Visual Basic or +Scheme. To find out more about what Python can do for you, point your +browser to http://www.python.org/. How do I learn Python? ---------------------- The official tutorial is still a good place to start; see -http://www.python.org/doc/ for online and downloadable versions, as -well as a list of other introductions, and reference documentation. +http://docs.python.org/ for online and downloadable versions, as well +as a list of other introductions, and reference documentation. There's a quickly growing set of books on Python. See -http://www.python.org/cgi-bin/moinmoin/PythonBooks for a list. +http://wiki.python.org/moin/PythonBooks for a list. Documentation @@ -82,16 +82,17 @@ and functions! All documentation is also available online at the Python web site -(http://www.python.org/doc/, see below). It is available online for +(http://docs.python.org/, see below). It is available online for occasional reference, or can be downloaded in many formats for faster access. The documentation is available in HTML, PostScript, PDF, and LaTeX formats; the LaTeX version is primarily for documentation authors, translators, and people with special formatting requirements. -The best documentation for the new (in Python 2.2) type/class -unification features is Guido's tutorial introduction, at +Unfortunately, new-style classes (new in Python 2.2) have not yet been +integrated into Python's standard documention. A collection of +pointers to what has been written is at: - http://www.python.org/2.2.1/descrintro.html + http://www.python.org/doc/newstyle.html Web sites @@ -110,12 +111,12 @@ Read comp.lang.python, a high-volume discussion newsgroup about Python, or comp.lang.python.announce, a low-volume moderated newsgroup for Python-related announcements. These are also accessible as -mailing lists: see http://www.python.org/psa/MailingLists.html for an -overview of the many Python-related mailing lists. +mailing lists: see http://www.python.org/community/lists.html for an +overview of these and many other Python-related mailing lists. Archives are accessible via the Google Groups usenet archive; see http://groups.google.com/. The mailing lists are also archived, see -http://www.python.org/psa/MailingLists.html for details. +http://www.python.org/community/lists.html for details. Bug reports @@ -135,7 +136,7 @@ If you have a proposal to change Python, it's best to submit a Python Enhancement Proposal (PEP) first. All current PEPs, as well as guidelines for submitting a new PEP, are listed at -http://python.sourceforge.net/peps/. +http://www.python.org/peps/. Questions @@ -189,10 +190,9 @@ See also the platform specific notes in the next section. -If you run into other trouble, see section 3 of the FAQ -(http://www.python.org/cgi-bin/faqw.py or -http://www.python.org/doc/FAQ.html) for hints on what can go wrong, -and how to fix it. +If you run into other trouble, see the FAQ +(http://www.python.org/doc/faq) for hints on what can go wrong, and +how to fix it. If you rerun the configure script with different options, remove all object files by running "make clean" before rebuilding. Believe it or @@ -200,8 +200,7 @@ problems as well. Try it before sending in a bug report! If the configure script fails or doesn't seem to find things that -should be there, inspect the config.log file. When you fix a -configure problem, be sure to remove config.cache! +should be there, inspect the config.log file. If you get a warning for every file about the -Olimit option being no longer supported, you can ignore it. There's no foolproof way to know @@ -227,8 +226,10 @@ Unsupported systems ------------------- +XXX This section is out of date! + A number of features are not supported in Python 2.3 anymore. Some -support code is still present, but will be removed in Python 2.4. +support code is still present, but will be removed in Python 2.4. If you still need to use current Python versions on these systems, please send a message to python-dev at python.org indicating that you volunteer to support this system. @@ -263,12 +264,9 @@ bsddb185 bsddbmodule.c should work. (You may need to add -I, -L or -l flags to direct the - compiler and linker to your include files and libraries.) You can - then force it to be the version people import by adding - - import bsddb185 as bsddb + compiler and linker to your include files and libraries.) - in sitecustomize.py. +XXX I think this next bit is out of date: 64-bit platforms: The modules audioop, imageop and rgbimg don't work. The setup.py script disables them on 64-bit installations. @@ -295,8 +293,8 @@ When the dynamic loader complains about errors finding shared libraries, such as - ld.so.1: ./python: fatal: libstdc++.so.5: open failed: - No such file or directory + ld.so.1: ./python: fatal: libstdc++.so.5: open failed: + No such file or directory you need to first make sure that the library is available on your system. Then, you need to instruct the dynamic loader how @@ -314,13 +312,9 @@ solves the problem. This causes the popen2 test to fail; problem and solution reported by Pablo Bleyer. - Under Linux systems using GNU libc 2 (aka libc6), the crypt - module now needs the -lcrypt option. The setup.py script - takes care of this automatically. - Red Hat Linux: Red Hat 9 built Python2.2 in UCS-4 mode and hacked Tcl to support it. To compile Python2.3 with Tkinter, you will - need to pass --enable-unicode=ucs4 flag to ./configure. + need to pass --enable-unicode=ucs4 flag to ./configure. There's an executable /usr/bin/python which is Python 1.5.2 on most older Red Hat installations; several key Red Hat tools @@ -363,19 +357,44 @@ like "cc_r". For full C++ module support, set CC="xlC_r" (or CC="xlC" without thread support). +AIX 5.3: To build a 64-bit version with IBM's compiler, I used the + following: + + export PATH=/usr/bin:/usr/vacpp/bin + ./configure --with-gcc="xlc_r -q64" --with-cxx="xlC_r -q64" \ + --disable-ipv6 AR="ar -X64" + make + HP-UX: When using threading, you may have to add -D_REENTRANT to the OPT variable in the top-level Makefile; reported by Pat Knight, this seems to make a difference (at least for HP-UX 10.20) even though pyconfig.h defines it. This seems unnecessary when - using HP/UX 11 and later - threading seems to work "out of the + using HP/UX 11 and later - threading seems to work "out of the box". -HP-UX ia64: When building on the ia64 (Itanium) platform using HP's - compiler, some experience has shown that the compiler's - optimiser produces a completely broken version of python - (see http://www.python.org/sf/814976). To work around this, +HP-UX ia64: When building on the ia64 (Itanium) platform using HP's + compiler, some experience has shown that the compiler's + optimiser produces a completely broken version of python + (see http://www.python.org/sf/814976). To work around this, edit the Makefile and remove -O from the OPT line. + To build a 64-bit executable on an Itanium 2 system using HP's + compiler, use these environment variables: + + CC=cc + CXX=aCC + BASECFLAGS="+DD64" + LDFLAGS="+DD64 -lxnet" + + and call configure as: + + ./configure --without-gcc + + then *unset* the environment variables again before running + make. (At least one of these flags causes the build to fail + if it remains set.) You still have to edit the Makefile and + remove -O from the OPT line. + HP PA-RISC 2.0: A recent bug report (http://www.python.org/sf/546117) suggests that the C compiler in this 64-bit system has bugs in the optimizer that break Python. Compiling without @@ -541,7 +560,7 @@ do "sudo make install" which installs everything as superuser, as this may later cause problems when installing distutils-based additions. - + Some people have reported problems building Python after using "fink" to install additional unix software. Disabling fink (remove all references to /sw from your .profile or .login) should solve this. @@ -552,7 +571,7 @@ /Library/Frameworks). A framework install is probably needed if you want to use any Aqua-based GUI toolkit (whether Tkinter, wxPython, Carbon, Cocoa or anything else). - + See Mac/OSX/README for more information on framework builds. Cygwin: With recent (relative to the time of writing, 2001-12-19) @@ -767,9 +786,9 @@ --------------------------- Starting with Python 2.3, the majority of the interpreter can be built -into a shared library, which can then be used by the interpreter +into a shared library, which can then be used by the interpreter executable, and by applications embedding Python. To enable this feature, -configure with --enable-shared. +configure with --enable-shared. If you enable this feature, the same object files will be used to create a static library. In particular, the static library will contain object @@ -833,6 +852,9 @@ Additional debugging code to help debug memory management problems can be enabled by using the --with-pydebug option to the configure script. +For flags that change binary compatibility, use the EXTRA_CFLAGS +variable. + Profiling --------- @@ -1018,7 +1040,7 @@ --with-pydebug: Enable additional debugging code to help track down memory management problems. This allows printing a list of all live objects when the interpreter terminates. - + --with(out)-universal-newlines: enable reading of text files with foreign newline convention (default: enabled). In other words, any of \r, \n or \r\n is acceptable as end-of-line character. @@ -1028,6 +1050,7 @@ --with-tsc: Profile using the Pentium timestamping counter (TSC). + Building for multiple architectures (using the VPATH feature) ------------------------------------------------------------- @@ -1130,9 +1153,7 @@ For more Tkinter information, see the Tkinter Resource page: http://www.python.org/topics/tkinter/ -There are demos in the Demo/tkinter directory, in the subdirectories -guido, matt and www (the matt and guido subdirectories have been -overhauled to use more recent Tkinter coding conventions). +There are demos in the Demo/tkinter directory. Note that there's a Python module called "Tkinter" (capital T) which lives in Lib/lib-tk/Tkinter.py, and a C module called "_tkinter" Index: configure =================================================================== RCS file: /cvsroot/python/python/dist/src/configure,v retrieving revision 1.317.2.2 retrieving revision 1.317.2.3 diff -u -d -r1.317.2.2 -r1.317.2.3 --- configure 7 Jan 2005 06:56:36 -0000 1.317.2.2 +++ configure 16 Oct 2005 05:23:54 -0000 1.317.2.3 @@ -1,8 +1,10 @@ #! /bin/sh -# From configure.in Revision: 1.477 . +# From configure.in Revision: 1.486 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.59 for python 2.5. # +# Report bugs to . +# # Copyright (C) 2003 Free Software Foundation, Inc. # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. @@ -270,7 +272,7 @@ PACKAGE_TARNAME='python' PACKAGE_VERSION='2.5' PACKAGE_STRING='python 2.5' -PACKAGE_BUGREPORT='' +PACKAGE_BUGREPORT='http://www.python.org/python-bugs' ac_unique_file="Include/object.h" # Factoring default headers for most tests. @@ -310,7 +312,7 @@ # include #endif" -ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS VERSION SOVERSION CONFIG_ARGS PYTHONFRAMEWORK PYTHONFRAMEWORKDIR PYTHONFRAMEWORKPREFIX PYTHONFRAMEWORKINSTALLDIR MACHDEP SGI_ABI EXTRAPLATDIR EXTRAMACHDEPPATH CONFIGURE_MACOSX_DEPLOYMENT_TARGET CXX MAINOBJ EXEEXT CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC OBJEXT CPP EGREP BUILDEXEEXT LIBRARY LDLIBRARY DLLLIBRARY BLDLIBRARY LDLIBRARYDIR INSTSONAME RUNSHARED LINKCC RANLIB ac_ct_RANLIB AR INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN OPT BASECFLAGS LIBTOOL_CRUFT SO LDSHARED BLDSHARED CCSHARED LINKFORSHARED CFLAGSFORSHARED SHLIBS USE_SIGNAL_MODULE SIGNAL_OBJS USE_THREAD_MODULE LDLAST THREADOBJ DLINCLDIR DYNLOADFILE MACHDEP_OBJS TRUE LIBOBJS HAVE_GETHOSTBYNAME_R_6_ARG HAVE_GETHOSTBYNAME_R_5_ARG HAVE_GETHOSTBYNAME_R_3_ARG HAVE_GETHOSTBYNAME_R HAVE_GETHOSTBYNAME LIBM LIBC UNICODE_OBJS THREADHEADERS SRCDIRS LTLIBOBJS' +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS VERSION SOVERSION CONFIG_ARGS PYTHONFRAMEWORK PYTHONFRAMEWORKDIR PYTHONFRAMEWORKPREFIX PYTHONFRAMEWORKINSTALLDIR MACHDEP SGI_ABI EXTRAPLATDIR EXTRAMACHDEPPATH CONFIGURE_MACOSX_DEPLOYMENT_TARGET CXX MAINOBJ EXEEXT CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC OBJEXT CPP EGREP BUILDEXEEXT LIBRARY LDLIBRARY DLLLIBRARY BLDLIBRARY LDLIBRARYDIR INSTSONAME RUNSHARED LINKCC RANLIB ac_ct_RANLIB AR INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN OPT BASECFLAGS OTHER_LIBTOOL_OPT LIBTOOL_CRUFT SO LDSHARED BLDSHARED CCSHARED LINKFORSHARED CFLAGSFORSHARED SHLIBS USE_SIGNAL_MODULE SIGNAL_OBJS USE_THREAD_MODULE LDLAST THREADOBJ DLINCLDIR DYNLOADFILE MACHDEP_OBJS TRUE LIBOBJS HAVE_GETHOSTBYNAME_R_6_ARG HAVE_GETHOSTBYNAME_R_5_ARG HAVE_GETHOSTBYNAME_R_3_ARG HAVE_GETHOSTBYNAME_R HAVE_GETHOSTBYNAME LIBM LIBC UNICODE_OBJS THREADHEADERS SRCDIRS LTLIBOBJS' ac_subst_files='' # Initialize some variables set by options. @@ -888,6 +890,7 @@ Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. +Report bugs to . _ACEOF fi @@ -1508,6 +1511,12 @@ define_xopen_source=no fi ;; + # On Mac OS X 10.4, defining _POSIX_C_SOURCE or _XOPEN_SOURCE + # disables platform specific features beyond repair. + Darwin/8.*) + define_xopen_source=no + ;; + esac if test $define_xopen_source = yes @@ -1542,6 +1551,7 @@ _ACEOF + cat >>confdefs.h <<\_ACEOF #define _POSIX_C_SOURCE 200112L _ACEOF @@ -3255,9 +3265,11 @@ if test -z "$CXX"; then LINKCC="\$(PURIFY) \$(CC)" else - echo 'void foo();int main(){foo();}void foo(){}' > conftest.$ac_ext - $CXX -c conftest.$ac_ext 2>&5 - if $CC -o conftest$ac_exeext conftest.$ac_objext 2>&5 \ + echo 'extern "C" void foo();int main(){foo();}' > conftest_a.cc + $CXX -c conftest_a.cc # 2>&5 + echo 'void foo(){}' > conftest_b.$ac_ext + $CC -c conftest_b.$ac_ext # 2>&5 + if $CC -o conftest$ac_exeext conftest_a.$ac_objext conftest_b.$ac_objext 2>&5 \ && test -s conftest$ac_exeext && ./conftest$ac_exeext then LINKCC="\$(PURIFY) \$(CC)" @@ -4538,7 +4550,8 @@ -for ac_header in curses.h dlfcn.h fcntl.h grp.h langinfo.h \ + +for ac_header in curses.h dlfcn.h fcntl.h grp.h shadow.h langinfo.h \ libintl.h ncurses.h poll.h pthread.h \ stropts.h termios.h thread.h \ unistd.h utime.h \ @@ -4668,9 +4681,9 @@ echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} ( cat <<\_ASBOX -## --------------------------------- ## -## Report this to the python lists. ## -## --------------------------------- ## +## ------------------------------------------------ ## +## Report this to http://www.python.org/python-bugs ## +## ------------------------------------------------ ## _ASBOX ) | sed "s/^/$as_me: WARNING: /" >&2 @@ -5201,9 +5214,9 @@ echo "$as_me: WARNING: sys/mkdev.h: in the future, the compiler will take precedence" >&2;} ( cat <<\_ASBOX -## --------------------------------- ## -## Report this to the python lists. ## -## --------------------------------- ## +## ------------------------------------------------ ## +## Report this to http://www.python.org/python-bugs ## +## ------------------------------------------------ ## _ASBOX ) | sed "s/^/$as_me: WARNING: /" >&2 @@ -5349,9 +5362,9 @@ echo "$as_me: WARNING: sys/sysmacros.h: in the future, the compiler will take precedence" >&2;} ( cat <<\_ASBOX -## --------------------------------- ## -## Report this to the python lists. ## -## --------------------------------- ## +## ------------------------------------------------ ## +## Report this to http://www.python.org/python-bugs ## +## ------------------------------------------------ ## _ASBOX ) | sed "s/^/$as_me: WARNING: /" >&2 @@ -10193,13 +10206,29 @@ case $ac_sys_system/$ac_sys_release in - Darwin/1.3*) - LIBTOOL_CRUFT="-lcc_dynamic -arch_only ppc" + Darwin/[01234567].*) + OTHER_LIBTOOL_OPT="-prebind -seg1addr 0x10000000" + ;; + Darwin/*) + OTHER_LIBTOOL_OPT="" + ;; +esac + + +case $ac_sys_system/$ac_sys_release in + Darwin/[01234567].*) + LIBTOOL_CRUFT="-framework System -lcc_dynamic -arch_only ppc" LIBTOOL_CRUFT=$LIBTOOL_CRUFT' -install_name $(PYTHONFRAMEWORKINSTALLDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' LIBTOOL_CRUFT=$LIBTOOL_CRUFT' -compatibility_version $(VERSION) -current_version $(VERSION)';; Darwin/*) - LIBTOOL_CRUFT="-lcc_dynamic -arch_only ppc" - LIBTOOL_CRUFT="$LIBTOOL_CRUFT" + gcc_version=`gcc -v 2>&1 | grep version | cut -d\ -f3` + if test ${gcc_version} '<' 4.0 + then + LIBTOOL_CRUFT="-lcc_dynamic" + else + LIBTOOL_CRUFT="" + fi + LIBTOOL_CRUFT=$LIBTOOL_CRUFT' -lSystem -lSystemStubs -arch_only ppc' LIBTOOL_CRUFT=$LIBTOOL_CRUFT' -install_name $(PYTHONFRAMEWORKINSTALLDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' LIBTOOL_CRUFT=$LIBTOOL_CRUFT' -compatibility_version $(VERSION) -current_version $(VERSION)';; esac @@ -10302,8 +10331,8 @@ LDSHARED='$(CC) $(LDFLAGS) -bundle' if test "$enable_framework" ; then # Link against the framework. All externals should be defined. - BLDSHARED="$LDSHARED "'-Wl,-F. -framework $(PYTHONFRAMEWORK)' - LDSHARED="$LDSHARED "'-Wl,-F$(PYTHONFRAMEWORKPREFIX) -framework $(PYTHONFRAMEWORK)' + BLDSHARED="$LDSHARED "'$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' + LDSHARED="$LDSHARED "'$(PYTHONFRAMEWORKPREFIX)/$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' else # No framework. Ignore undefined symbols, assuming they come from Python LDSHARED="$LDSHARED -undefined suppress" @@ -10312,8 +10341,8 @@ LDSHARED='$(CC) $(LDFLAGS) -bundle' if test "$enable_framework" ; then # Link against the framework. All externals should be defined. - BLDSHARED="$LDSHARED "'-Wl,-F. -framework $(PYTHONFRAMEWORK)' - LDSHARED="$LDSHARED "'-Wl,-F$(PYTHONFRAMEWORKPREFIX) -framework $(PYTHONFRAMEWORK)' + BLDSHARED="$LDSHARED "'$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' + LDSHARED="$LDSHARED "'$(PYTHONFRAMEWORKPREFIX)/$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' else # No framework, use the Python app as bundle-loader BLDSHARED="$LDSHARED "'-bundle_loader $(BUILDPYTHON)' @@ -10323,17 +10352,17 @@ # Use -undefined dynamic_lookup whenever possible (10.3 and later). # This allows an extension to be used in any Python cur_target=`sw_vers -productVersion | sed 's/\(10\.[0-9]*\).*/\1/'` + CONFIGURE_MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET-${cur_target}} if test ${MACOSX_DEPLOYMENT_TARGET-${cur_target}} '>' 10.2 then LDSHARED='$(CC) $(LDFLAGS) -bundle -undefined dynamic_lookup' BLDSHARED="$LDSHARED" - CONFIGURE_MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET-${cur_target}} else LDSHARED='$(CC) $(LDFLAGS) -bundle' if test "$enable_framework" ; then # Link against the framework. All externals should be defined. - BLDSHARED="$LDSHARED "'-Wl,-F. -framework $(PYTHONFRAMEWORK)' - LDSHARED="$LDSHARED "'-Wl,-F$(PYTHONFRAMEWORKPREFIX) -framework $(PYTHONFRAMEWORK)' + BLDSHARED="$LDSHARED "'$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' + LDSHARED="$LDSHARED "'$(PYTHONFRAMEWORKPREFIX)/$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' else # No framework, use the Python app as bundle-loader BLDSHARED="$LDSHARED "'-bundle_loader $(BUILDPYTHON)' @@ -10429,7 +10458,7 @@ LINKFORSHARED="$extra_undefs" if test "$enable_framework" then - LINKFORSHARED="$LINKFORSHARED -Wl,-F. -framework "'$(PYTHONFRAMEWORK)' + LINKFORSHARED="$LINKFORSHARED "'$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' fi LINKFORSHARED="$LINKFORSHARED";; OpenUNIX*|UnixWare*) LINKFORSHARED="-Wl,-Bexport";; @@ -11430,9 +11459,9 @@ echo "$as_me: WARNING: cthreads.h: in the future, the compiler will take precedence" >&2;} ( cat <<\_ASBOX -## --------------------------------- ## -## Report this to the python lists. ## -## --------------------------------- ## +## ------------------------------------------------ ## +## Report this to http://www.python.org/python-bugs ## +## ------------------------------------------------ ## _ASBOX ) | sed "s/^/$as_me: WARNING: /" >&2 @@ -11585,9 +11614,9 @@ echo "$as_me: WARNING: mach/cthreads.h: in the future, the compiler will take precedence" >&2;} ( cat <<\_ASBOX -## --------------------------------- ## -## Report this to the python lists. ## -## --------------------------------- ## +## ------------------------------------------------ ## +## Report this to http://www.python.org/python-bugs ## +## ------------------------------------------------ ## _ASBOX ) | sed "s/^/$as_me: WARNING: /" >&2 @@ -11917,9 +11946,9 @@ echo "$as_me: WARNING: atheos/threads.h: in the future, the compiler will take precedence" >&2;} ( cat <<\_ASBOX -## --------------------------------- ## -## Report this to the python lists. ## -## --------------------------------- ## +## ------------------------------------------------ ## +## Report this to http://www.python.org/python-bugs ## +## ------------------------------------------------ ## _ASBOX ) | sed "s/^/$as_me: WARNING: /" >&2 @@ -12067,9 +12096,9 @@ echo "$as_me: WARNING: kernel/OS.h: in the future, the compiler will take precedence" >&2;} ( cat <<\_ASBOX -## --------------------------------- ## -## Report this to the python lists. ## -## --------------------------------- ## +## ------------------------------------------------ ## +## Report this to http://www.python.org/python-bugs ## +## ------------------------------------------------ ## _ASBOX ) | sed "s/^/$as_me: WARNING: /" >&2 @@ -13429,10 +13458,12 @@ + + for ac_func in alarm bind_textdomain_codeset chown clock confstr ctermid \ execv fork fpathconf ftime ftruncate \ gai_strerror getgroups getlogin getloadavg getpeername getpgid getpid \ - getpriority getpwent getsid getwd \ + getpriority getpwent getspnam getspent getsid getwd \ kill killpg lchown lstat mkfifo mknod mktime \ mremap nice pathconf pause plock poll pthread_init \ putenv readlink realpath \ @@ -16385,6 +16416,336 @@ fi +echo "$as_me:$LINENO: checking for struct stat.st_flags" >&5 +echo $ECHO_N "checking for struct stat.st_flags... $ECHO_C" >&6 +if test "${ac_cv_member_struct_stat_st_flags+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static struct stat ac_aggr; +if (ac_aggr.st_flags) +return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_member_struct_stat_st_flags=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static struct stat ac_aggr; +if (sizeof ac_aggr.st_flags) +return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_member_struct_stat_st_flags=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_member_struct_stat_st_flags=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_member_struct_stat_st_flags" >&5 +echo "${ECHO_T}$ac_cv_member_struct_stat_st_flags" >&6 +if test $ac_cv_member_struct_stat_st_flags = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_STAT_ST_FLAGS 1 +_ACEOF + + +fi + +echo "$as_me:$LINENO: checking for struct stat.st_gen" >&5 +echo $ECHO_N "checking for struct stat.st_gen... $ECHO_C" >&6 +if test "${ac_cv_member_struct_stat_st_gen+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static struct stat ac_aggr; +if (ac_aggr.st_gen) +return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_member_struct_stat_st_gen=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static struct stat ac_aggr; +if (sizeof ac_aggr.st_gen) +return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_member_struct_stat_st_gen=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_member_struct_stat_st_gen=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_member_struct_stat_st_gen" >&5 +echo "${ECHO_T}$ac_cv_member_struct_stat_st_gen" >&6 +if test $ac_cv_member_struct_stat_st_gen = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_STAT_ST_GEN 1 +_ACEOF + + +fi + +echo "$as_me:$LINENO: checking for struct stat.st_birthtime" >&5 +echo $ECHO_N "checking for struct stat.st_birthtime... $ECHO_C" >&6 +if test "${ac_cv_member_struct_stat_st_birthtime+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static struct stat ac_aggr; +if (ac_aggr.st_birthtime) +return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_member_struct_stat_st_birthtime=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static struct stat ac_aggr; +if (sizeof ac_aggr.st_birthtime) +return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_member_struct_stat_st_birthtime=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_member_struct_stat_st_birthtime=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_member_struct_stat_st_birthtime" >&5 +echo "${ECHO_T}$ac_cv_member_struct_stat_st_birthtime" >&6 +if test $ac_cv_member_struct_stat_st_birthtime = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_STAT_ST_BIRTHTIME 1 +_ACEOF + + +fi + echo "$as_me:$LINENO: checking for struct stat.st_blocks" >&5 echo $ECHO_N "checking for struct stat.st_blocks... $ECHO_C" >&6 if test "${ac_cv_member_struct_stat_st_blocks+set}" = set; then @@ -18251,9 +18612,9 @@ echo "$as_me: WARNING: wchar.h: in the future, the compiler will take precedence" >&2;} ( cat <<\_ASBOX -## --------------------------------- ## -## Report this to the python lists. ## -## --------------------------------- ## +## ------------------------------------------------ ## +## Report this to http://www.python.org/python-bugs ## +## ------------------------------------------------ ## _ASBOX ) | sed "s/^/$as_me: WARNING: /" >&2 @@ -19748,8 +20109,201 @@ fi +# Before we can test tzset, we need to check if struct tm has a tm_zone +# (which is not required by ISO C or UNIX spec) and/or if we support +# tzname[] +echo "$as_me:$LINENO: checking for struct tm.tm_zone" >&5 +echo $ECHO_N "checking for struct tm.tm_zone... $ECHO_C" >&6 +if test "${ac_cv_member_struct_tm_tm_zone+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include <$ac_cv_struct_tm> -# tzset(3) exists and works like we expect it to + +int +main () +{ +static struct tm ac_aggr; +if (ac_aggr.tm_zone) +return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_member_struct_tm_tm_zone=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include <$ac_cv_struct_tm> + + +int +main () +{ +static struct tm ac_aggr; +if (sizeof ac_aggr.tm_zone) +return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_member_struct_tm_tm_zone=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_member_struct_tm_tm_zone=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_member_struct_tm_tm_zone" >&5 +echo "${ECHO_T}$ac_cv_member_struct_tm_tm_zone" >&6 +if test $ac_cv_member_struct_tm_tm_zone = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_TM_TM_ZONE 1 +_ACEOF + + +fi + +if test "$ac_cv_member_struct_tm_tm_zone" = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_TM_ZONE 1 +_ACEOF + +else + echo "$as_me:$LINENO: checking for tzname" >&5 +echo $ECHO_N "checking for tzname... $ECHO_C" >&6 +if test "${ac_cv_var_tzname+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#ifndef tzname /* For SGI. */ +extern char *tzname[]; /* RS6000 and others reject char **tzname. */ +#endif + +int +main () +{ +atoi(*tzname); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_var_tzname=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_var_tzname=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_var_tzname" >&5 +echo "${ECHO_T}$ac_cv_var_tzname" >&6 + if test $ac_cv_var_tzname = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_TZNAME 1 +_ACEOF + + fi +fi + + +# check tzset(3) exists and works like we expect it to echo "$as_me:$LINENO: checking for working tzset()" >&5 echo $ECHO_N "checking for working tzset()... $ECHO_C" >&6 if test "${ac_cv_working_tzset+set}" = set; then @@ -19769,37 +20323,62 @@ #include #include #include + +#if HAVE_TZNAME +extern char *tzname[]; +#endif + int main() { /* Note that we need to ensure that not only does tzset(3) do 'something' with localtime, but it works as documented in the library reference and as expected by the test suite. + This includes making sure that tzname is set properly if + tm->tm_zone does not exist since it is the alternative way + of getting timezone info. Red Hat 6.2 doesn't understand the southern hemisphere - after New Year's Day; it thinks swaps on that day. + after New Year's Day. */ - time_t groundhogday = 1044144000; /* GMT-based; well, it's a colony */ + time_t groundhogday = 1044144000; /* GMT-based */ time_t midyear = groundhogday + (365 * 24 * 3600 / 2); putenv("TZ=UTC+0"); tzset(); if (localtime(&groundhogday)->tm_hour != 0) exit(1); +#if HAVE_TZNAME + /* For UTC, tzname[1] is sometimes "", sometimes " " */ + if (strcmp(tzname[0], "UTC") || + (tzname[1][0] != 0 && tzname[1][0] != ' ')) + exit(1); +#endif putenv("TZ=EST+5EDT,M4.1.0,M10.5.0"); tzset(); if (localtime(&groundhogday)->tm_hour != 19) exit(1); +#if HAVE_TZNAME + if (strcmp(tzname[0], "EST") || strcmp(tzname[1], "EDT")) + exit(1); +#endif putenv("TZ=AEST-10AEDT-11,M10.5.0,M3.5.0"); tzset(); if (localtime(&groundhogday)->tm_hour != 11) exit(1); +#if HAVE_TZNAME + if (strcmp(tzname[0], "AEST") || strcmp(tzname[1], "AEDT")) + exit(1); +#endif + +#if HAVE_STRUCT_TM_TM_ZONE if (strcmp(localtime(&groundhogday)->tm_zone, "AEDT")) exit(1); if (strcmp(localtime(&midyear)->tm_zone, "AEST")) exit(1); +#endif exit(0); } @@ -19907,6 +20486,73 @@ fi +# Look for BSD style subsecond timestamps in struct stat +echo "$as_me:$LINENO: checking for tv_nsec2 in struct stat" >&5 +echo $ECHO_N "checking for tv_nsec2 in struct stat... $ECHO_C" >&6 +if test "${ac_cv_stat_tv_nsec2+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ + +struct stat st; +st.st_mtimespec.tv_nsec = 1; + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_stat_tv_nsec2=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_stat_tv_nsec2=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +echo "$as_me:$LINENO: result: $ac_cv_stat_tv_nsec2" >&5 +echo "${ECHO_T}$ac_cv_stat_tv_nsec2" >&6 +if test "$ac_cv_stat_tv_nsec2" = yes +then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_STAT_TV_NSEC2 1 +_ACEOF + +fi + # On HP/UX 11.0, mvwdelch is a block with a return statement echo "$as_me:$LINENO: checking whether mvwdelch is an expression" >&5 echo $ECHO_N "checking whether mvwdelch is an expression... $ECHO_C" >&6 @@ -20827,6 +21473,7 @@ s, at LN@,$LN,;t t s, at OPT@,$OPT,;t t s, at BASECFLAGS@,$BASECFLAGS,;t t +s, at OTHER_LIBTOOL_OPT@,$OTHER_LIBTOOL_OPT,;t t s, at LIBTOOL_CRUFT@,$LIBTOOL_CRUFT,;t t s, at SO@,$SO,;t t s, at LDSHARED@,$LDSHARED,;t t Index: configure.in =================================================================== RCS file: /cvsroot/python/python/dist/src/configure.in,v retrieving revision 1.327.2.2 retrieving revision 1.327.2.3 diff -u -d -r1.327.2.2 -r1.327.2.3 --- configure.in 7 Jan 2005 06:56:43 -0000 1.327.2.2 +++ configure.in 16 Oct 2005 05:23:56 -0000 1.327.2.3 @@ -5,7 +5,7 @@ AC_REVISION($Revision$) AC_PREREQ(2.53) -AC_INIT(python, PYTHON_VERSION) +AC_INIT(python, PYTHON_VERSION, http://www.python.org/python-bugs) AC_CONFIG_SRCDIR([Include/object.h]) AC_CONFIG_HEADER(pyconfig.h) @@ -171,6 +171,12 @@ define_xopen_source=no fi ;; + # On Mac OS X 10.4, defining _POSIX_C_SOURCE or _XOPEN_SOURCE + # disables platform specific features beyond repair. + Darwin/8.*) + define_xopen_source=no + ;; + esac if test $define_xopen_source = yes @@ -194,8 +200,9 @@ # cases for HP-UX, we define it globally. AC_DEFINE(_XOPEN_SOURCE_EXTENDED, 1, Define to activate Unix95-and-earlier features) - AC_DEFINE(_POSIX_C_SOURCE, 200112L, Define to activate features from IEEE Stds 1003.1-2001) + AC_DEFINE(_POSIX_C_SOURCE, 200112L, Define to activate features from IEEE Stds 1003.1-2001) + fi # @@ -464,9 +471,11 @@ if test -z "$CXX"; then LINKCC="\$(PURIFY) \$(CC)" else - echo 'void foo();int main(){foo();}void foo(){}' > conftest.$ac_ext - $CXX -c conftest.$ac_ext 2>&5 - if $CC -o conftest$ac_exeext conftest.$ac_objext 2>&5 \ + echo 'extern "C" void foo();int main(){foo();}' > conftest_a.cc + $CXX -c conftest_a.cc # 2>&5 + echo 'void foo(){}' > conftest_b.$ac_ext + $CC -c conftest_b.$ac_ext # 2>&5 + if $CC -o conftest$ac_exeext conftest_a.$ac_objext conftest_b.$ac_objext 2>&5 \ && test -s conftest$ac_exeext && ./conftest$ac_exeext then LINKCC="\$(PURIFY) \$(CC)" @@ -960,7 +969,7 @@ # checks for header files AC_HEADER_STDC -AC_CHECK_HEADERS(curses.h dlfcn.h fcntl.h grp.h langinfo.h \ +AC_CHECK_HEADERS(curses.h dlfcn.h fcntl.h grp.h shadow.h langinfo.h \ libintl.h ncurses.h poll.h pthread.h \ stropts.h termios.h thread.h \ unistd.h utime.h \ @@ -1205,15 +1214,31 @@ esac AC_MSG_RESULT($enable_toolbox_glue) +AC_SUBST(OTHER_LIBTOOL_OPT) +case $ac_sys_system/$ac_sys_release in + Darwin/@<:@01234567@:>@.*) + OTHER_LIBTOOL_OPT="-prebind -seg1addr 0x10000000" + ;; + Darwin/*) + OTHER_LIBTOOL_OPT="" + ;; +esac + AC_SUBST(LIBTOOL_CRUFT) case $ac_sys_system/$ac_sys_release in - Darwin/1.3*) - LIBTOOL_CRUFT="-lcc_dynamic -arch_only ppc" + Darwin/@<:@01234567@:>@.*) + LIBTOOL_CRUFT="-framework System -lcc_dynamic -arch_only ppc" LIBTOOL_CRUFT=$LIBTOOL_CRUFT' -install_name $(PYTHONFRAMEWORKINSTALLDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' LIBTOOL_CRUFT=$LIBTOOL_CRUFT' -compatibility_version $(VERSION) -current_version $(VERSION)';; Darwin/*) - LIBTOOL_CRUFT="-lcc_dynamic -arch_only ppc" - LIBTOOL_CRUFT="$LIBTOOL_CRUFT" + gcc_version=`gcc -v 2>&1 | grep version | cut -d\ -f3` + if test ${gcc_version} '<' 4.0 + then + LIBTOOL_CRUFT="-lcc_dynamic" + else + LIBTOOL_CRUFT="" + fi + LIBTOOL_CRUFT=$LIBTOOL_CRUFT' -lSystem -lSystemStubs -arch_only ppc' LIBTOOL_CRUFT=$LIBTOOL_CRUFT' -install_name $(PYTHONFRAMEWORKINSTALLDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' LIBTOOL_CRUFT=$LIBTOOL_CRUFT' -compatibility_version $(VERSION) -current_version $(VERSION)';; esac @@ -1304,8 +1329,8 @@ LDSHARED='$(CC) $(LDFLAGS) -bundle' if test "$enable_framework" ; then # Link against the framework. All externals should be defined. - BLDSHARED="$LDSHARED "'-Wl,-F. -framework $(PYTHONFRAMEWORK)' - LDSHARED="$LDSHARED "'-Wl,-F$(PYTHONFRAMEWORKPREFIX) -framework $(PYTHONFRAMEWORK)' + BLDSHARED="$LDSHARED "'$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' + LDSHARED="$LDSHARED "'$(PYTHONFRAMEWORKPREFIX)/$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' else # No framework. Ignore undefined symbols, assuming they come from Python LDSHARED="$LDSHARED -undefined suppress" @@ -1314,8 +1339,8 @@ LDSHARED='$(CC) $(LDFLAGS) -bundle' if test "$enable_framework" ; then # Link against the framework. All externals should be defined. - BLDSHARED="$LDSHARED "'-Wl,-F. -framework $(PYTHONFRAMEWORK)' - LDSHARED="$LDSHARED "'-Wl,-F$(PYTHONFRAMEWORKPREFIX) -framework $(PYTHONFRAMEWORK)' + BLDSHARED="$LDSHARED "'$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' + LDSHARED="$LDSHARED "'$(PYTHONFRAMEWORKPREFIX)/$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' else # No framework, use the Python app as bundle-loader BLDSHARED="$LDSHARED "'-bundle_loader $(BUILDPYTHON)' @@ -1325,17 +1350,17 @@ # Use -undefined dynamic_lookup whenever possible (10.3 and later). # This allows an extension to be used in any Python cur_target=`sw_vers -productVersion | sed 's/\(10\.[[0-9]]*\).*/\1/'` + CONFIGURE_MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET-${cur_target}} if test ${MACOSX_DEPLOYMENT_TARGET-${cur_target}} '>' 10.2 then LDSHARED='$(CC) $(LDFLAGS) -bundle -undefined dynamic_lookup' BLDSHARED="$LDSHARED" - CONFIGURE_MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET-${cur_target}} else LDSHARED='$(CC) $(LDFLAGS) -bundle' if test "$enable_framework" ; then # Link against the framework. All externals should be defined. - BLDSHARED="$LDSHARED "'-Wl,-F. -framework $(PYTHONFRAMEWORK)' - LDSHARED="$LDSHARED "'-Wl,-F$(PYTHONFRAMEWORKPREFIX) -framework $(PYTHONFRAMEWORK)' + BLDSHARED="$LDSHARED "'$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' + LDSHARED="$LDSHARED "'$(PYTHONFRAMEWORKPREFIX)/$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' else # No framework, use the Python app as bundle-loader BLDSHARED="$LDSHARED "'-bundle_loader $(BUILDPYTHON)' @@ -1427,7 +1452,7 @@ LINKFORSHARED="$extra_undefs" if test "$enable_framework" then - LINKFORSHARED="$LINKFORSHARED -Wl,-F. -framework "'$(PYTHONFRAMEWORK)' + LINKFORSHARED="$LINKFORSHARED "'$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' fi LINKFORSHARED="$LINKFORSHARED";; OpenUNIX*|UnixWare*) LINKFORSHARED="-Wl,-Bexport";; @@ -2076,7 +2101,7 @@ AC_CHECK_FUNCS(alarm bind_textdomain_codeset chown clock confstr ctermid \ execv fork fpathconf ftime ftruncate \ gai_strerror getgroups getlogin getloadavg getpeername getpgid getpid \ - getpriority getpwent getsid getwd \ + getpriority getpwent getspnam getspent getsid getwd \ kill killpg lchown lstat mkfifo mknod mktime \ mremap nice pathconf pause plock poll pthread_init \ putenv readlink realpath \ @@ -2398,6 +2423,9 @@ AC_STRUCT_TIMEZONE AC_CHECK_MEMBERS([struct stat.st_rdev]) AC_CHECK_MEMBERS([struct stat.st_blksize]) +AC_CHECK_MEMBERS([struct stat.st_flags]) +AC_CHECK_MEMBERS([struct stat.st_gen]) +AC_CHECK_MEMBERS([struct stat.st_birthtime]) AC_STRUCT_ST_BLOCKS AC_MSG_CHECKING(for time.h that defines altzone) @@ -2917,45 +2945,74 @@ [Define if poll() sets errno on invalid file descriptors.]) fi +# Before we can test tzset, we need to check if struct tm has a tm_zone +# (which is not required by ISO C or UNIX spec) and/or if we support +# tzname[] +AC_STRUCT_TIMEZONE -# tzset(3) exists and works like we expect it to +# check tzset(3) exists and works like we expect it to AC_MSG_CHECKING(for working tzset()) AC_CACHE_VAL(ac_cv_working_tzset, [ AC_TRY_RUN([ #include #include #include + +#if HAVE_TZNAME +extern char *tzname[]; +#endif + int main() { /* Note that we need to ensure that not only does tzset(3) do 'something' with localtime, but it works as documented in the library reference and as expected by the test suite. + This includes making sure that tzname is set properly if + tm->tm_zone does not exist since it is the alternative way + of getting timezone info. Red Hat 6.2 doesn't understand the southern hemisphere - after New Year's Day; it thinks swaps on that day. + after New Year's Day. */ - time_t groundhogday = 1044144000; /* GMT-based; well, it's a colony */ + time_t groundhogday = 1044144000; /* GMT-based */ time_t midyear = groundhogday + (365 * 24 * 3600 / 2); putenv("TZ=UTC+0"); tzset(); if (localtime(&groundhogday)->tm_hour != 0) exit(1); +#if HAVE_TZNAME + /* For UTC, tzname[1] is sometimes "", sometimes " " */ + if (strcmp(tzname[0], "UTC") || + (tzname[1][0] != 0 && tzname[1][0] != ' ')) + exit(1); +#endif putenv("TZ=EST+5EDT,M4.1.0,M10.5.0"); tzset(); if (localtime(&groundhogday)->tm_hour != 19) exit(1); +#if HAVE_TZNAME + if (strcmp(tzname[0], "EST") || strcmp(tzname[1], "EDT")) + exit(1); +#endif putenv("TZ=AEST-10AEDT-11,M10.5.0,M3.5.0"); tzset(); if (localtime(&groundhogday)->tm_hour != 11) exit(1); +#if HAVE_TZNAME + if (strcmp(tzname[0], "AEST") || strcmp(tzname[1], "AEDT")) + exit(1); +#endif + +#if HAVE_STRUCT_TM_TM_ZONE if (strcmp(localtime(&groundhogday)->tm_zone, "AEDT")) exit(1); if (strcmp(localtime(&midyear)->tm_zone, "AEST")) exit(1); +#endif exit(0); } @@ -2987,6 +3044,23 @@ [Define if you have struct stat.st_mtim.tv_nsec]) fi +# Look for BSD style subsecond timestamps in struct stat +AC_MSG_CHECKING(for tv_nsec2 in struct stat) +AC_CACHE_VAL(ac_cv_stat_tv_nsec2, +AC_TRY_COMPILE([#include ], [ +struct stat st; +st.st_mtimespec.tv_nsec = 1; +], +ac_cv_stat_tv_nsec2=yes, +ac_cv_stat_tv_nsec2=no, +ac_cv_stat_tv_nsec2=no)) +AC_MSG_RESULT($ac_cv_stat_tv_nsec2) +if test "$ac_cv_stat_tv_nsec2" = yes +then + AC_DEFINE(HAVE_STAT_TV_NSEC2, 1, + [Define if you have struct stat.st_mtimensec]) +fi + # On HP/UX 11.0, mvwdelch is a block with a return statement AC_MSG_CHECKING(whether mvwdelch is an expression) AC_CACHE_VAL(ac_cv_mvwdelch_is_expression, Index: pyconfig.h.in =================================================================== RCS file: /cvsroot/python/python/dist/src/pyconfig.h.in,v retrieving revision 1.42.2.2 retrieving revision 1.42.2.3 diff -u -d -r1.42.2.2 -r1.42.2.3 --- pyconfig.h.in 7 Jan 2005 06:56:44 -0000 1.42.2.2 +++ pyconfig.h.in 16 Oct 2005 05:23:56 -0000 1.42.2.3 @@ -215,6 +215,12 @@ /* Define to 1 if you have the `getsid' function. */ #undef HAVE_GETSID +/* Define to 1 if you have the `getspent' function. */ +#undef HAVE_GETSPENT + +/* Define to 1 if you have the `getspnam' function. */ +#undef HAVE_GETSPNAM + /* Define to 1 if you have the `gettimeofday' function. */ #undef HAVE_GETTIMEOFDAY @@ -419,6 +425,9 @@ /* Define to 1 if you have the `setvbuf' function. */ #undef HAVE_SETVBUF +/* Define to 1 if you have the header file. */ +#undef HAVE_SHADOW_H + /* Define to 1 if you have the `sigaction' function. */ #undef HAVE_SIGACTION @@ -446,6 +455,9 @@ /* Define if you have struct stat.st_mtim.tv_nsec */ #undef HAVE_STAT_TV_NSEC +/* Define if you have struct stat.st_mtimensec */ +#undef HAVE_STAT_TV_NSEC2 + /* Define if your compiler supports variable length function prototypes (e.g. void fprintf(FILE *, char *, ...);) *and* */ #undef HAVE_STDARG_PROTOTYPES @@ -474,12 +486,21 @@ /* Define to 1 if you have the header file. */ #undef HAVE_STROPTS_H +/* Define to 1 if `st_birthtime' is member of `struct stat'. */ +#undef HAVE_STRUCT_STAT_ST_BIRTHTIME + /* Define to 1 if `st_blksize' is member of `struct stat'. */ #undef HAVE_STRUCT_STAT_ST_BLKSIZE /* Define to 1 if `st_blocks' is member of `struct stat'. */ #undef HAVE_STRUCT_STAT_ST_BLOCKS +/* Define to 1 if `st_flags' is member of `struct stat'. */ +#undef HAVE_STRUCT_STAT_ST_FLAGS + +/* Define to 1 if `st_gen' is member of `struct stat'. */ +#undef HAVE_STRUCT_STAT_ST_GEN + /* Define to 1 if `st_rdev' is member of `struct stat'. */ #undef HAVE_STRUCT_STAT_ST_RDEV Index: setup.py =================================================================== RCS file: /cvsroot/python/python/dist/src/setup.py,v retrieving revision 1.98.2.2 retrieving revision 1.98.2.3 diff -u -d -r1.98.2.2 -r1.98.2.3 --- setup.py 7 Jan 2005 06:56:44 -0000 1.98.2.2 +++ setup.py 16 Oct 2005 05:23:56 -0000 1.98.2.3 @@ -171,8 +171,8 @@ # unfortunately, distutils doesn't let us provide separate C and C++ # compilers if compiler is not None: - (ccshared,opt,base) = sysconfig.get_config_vars('CCSHARED','OPT','BASECFLAGS') - args['compiler_so'] = compiler + ' ' + opt + ' ' + ccshared + ' ' + base + (ccshared,cflags) = sysconfig.get_config_vars('CCSHARED','CFLAGS') + args['compiler_so'] = compiler + ' ' + ccshared + ' ' + cflags self.compiler.set_executables(**args) build_ext.build_extensions(self) @@ -295,6 +295,9 @@ inc_dirs = self.compiler.include_dirs + ['/usr/include'] exts = [] + config_h = sysconfig.get_config_h_filename() + config_h_vars = sysconfig.parse_config_h(open(config_h)) + platform = self.get_platform() (srcdir,) = sysconfig.get_config_vars('srcdir') @@ -355,6 +358,8 @@ exts.append( Extension("_heapq", ["_heapqmodule.c"]) ) # operator.add() and similar goodies exts.append( Extension('operator', ['operator.c']) ) + # functional + exts.append( Extension("functional", ["functionalmodule.c"]) ) # Python C API test module exts.append( Extension('_testcapi', ['_testcapimodule.c']) ) # static Unicode character database @@ -384,22 +389,17 @@ # fcntl(2) and ioctl(2) exts.append( Extension('fcntl', ['fcntlmodule.c']) ) if platform not in ['mac']: - # pwd(3) + # pwd(3) exts.append( Extension('pwd', ['pwdmodule.c']) ) # grp(3) exts.append( Extension('grp', ['grpmodule.c']) ) + # spwd, shadow passwords + if (config_h_vars.get('HAVE_GETSPNAM', False) or + config_h_vars.get('HAVE_GETSPENT', False)): + exts.append( Extension('spwd', ['spwdmodule.c']) ) # select(2); not on ancient System V exts.append( Extension('select', ['selectmodule.c']) ) - # The md5 module implements the RSA Data Security, Inc. MD5 - # Message-Digest Algorithm, described in RFC 1321. The - # necessary files md5c.c and md5.h are included here. - exts.append( Extension('md5', ['md5module.c', 'md5c.c']) ) - - # The sha module implements the SHA checksum algorithm. - # (NIST's Secure Hash Algorithm.) - exts.append( Extension('sha', ['shamodule.c']) ) - # Helper module for various ascii-encoders exts.append( Extension('binascii', ['binascii.c']) ) @@ -474,10 +474,12 @@ exts.append( Extension('_socket', ['socketmodule.c'], depends = ['socketmodule.h']) ) # Detect SSL support for the socket module (via _ssl) - ssl_incs = find_file('openssl/ssl.h', inc_dirs, - ['/usr/local/ssl/include', + search_for_ssl_incs_in = [ + '/usr/local/ssl/include', '/usr/contrib/ssl/include/' ] + ssl_incs = find_file('openssl/ssl.h', inc_dirs, + search_for_ssl_incs_in ) if ssl_incs is not None: krb5_h = find_file('krb5.h', inc_dirs, @@ -497,6 +499,52 @@ libraries = ['ssl', 'crypto'], depends = ['socketmodule.h']), ) + # find out which version of OpenSSL we have + openssl_ver = 0 + openssl_ver_re = re.compile( + '^\s*#\s*define\s+OPENSSL_VERSION_NUMBER\s+(0x[0-9a-fA-F]+)' ) + for ssl_inc_dir in inc_dirs + search_for_ssl_incs_in: + name = os.path.join(ssl_inc_dir, 'openssl', 'opensslv.h') + if os.path.isfile(name): + try: + incfile = open(name, 'r') + for line in incfile: + m = openssl_ver_re.match(line) + if m: + openssl_ver = eval(m.group(1)) + break + except IOError: + pass + + # first version found is what we'll use (as the compiler should) + if openssl_ver: + break + + #print 'openssl_ver = 0x%08x' % openssl_ver + + if (ssl_incs is not None and + ssl_libs is not None and + openssl_ver >= 0x00907000): + # The _hashlib module wraps optimized implementations + # of hash functions from the OpenSSL library. + exts.append( Extension('_hashlib', ['_hashopenssl.c'], + include_dirs = ssl_incs, + library_dirs = ssl_libs, + libraries = ['ssl', 'crypto']) ) + else: + # The _sha module implements the SHA1 hash algorithm. + exts.append( Extension('_sha', ['shamodule.c']) ) + # The _md5 module implements the RSA Data Security, Inc. MD5 + # Message-Digest Algorithm, described in RFC 1321. The + # necessary files md5c.c and md5.h are included here. + exts.append( Extension('_md5', ['md5module.c', 'md5c.c']) ) + + if (openssl_ver < 0x00908000): + # OpenSSL doesn't do these until 0.9.8 so we'll bring our own hash + exts.append( Extension('_sha256', ['sha256module.c']) ) + exts.append( Extension('_sha512', ['sha512module.c']) ) + + # Modules that provide persistent dictionary-like semantics. You will # probably want to arrange for at least one of them to be available on # your machine, though none are defined by default because of library @@ -592,7 +640,9 @@ # XXX should we -ever- look for a dbX name? Do any # systems really not name their library by version and # symlink to more general names? - for dblib in (('db-%d.%d' % db_ver), ('db%d' % db_ver[0])): + for dblib in (('db-%d.%d' % db_ver), + ('db%d%d' % db_ver), + ('db%d' % db_ver[0])): dblib_file = self.compiler.find_library_file( db_dirs_to_check + lib_dirs, dblib ) if dblib_file: @@ -777,8 +827,6 @@ ('BYTEORDER', xmlbo), ('XML_CONTEXT_BYTES','1024'), ] - config_h = sysconfig.get_config_h_filename() - config_h_vars = sysconfig.parse_config_h(open(config_h)) for feature_macro in ['HAVE_MEMMOVE', 'HAVE_BCOPY']: if config_h_vars.has_key(feature_macro): define_macros.append((feature_macro, '1')) @@ -793,11 +841,12 @@ )) # Hye-Shik Chang's CJKCodecs modules. - exts.append(Extension('_multibytecodec', - ['cjkcodecs/multibytecodec.c'])) - for loc in ('kr', 'jp', 'cn', 'tw', 'hk', 'iso2022'): - exts.append(Extension('_codecs_' + loc, - ['cjkcodecs/_codecs_%s.c' % loc])) + if have_unicode: + exts.append(Extension('_multibytecodec', + ['cjkcodecs/multibytecodec.c'])) + for loc in ('kr', 'jp', 'cn', 'tw', 'hk', 'iso2022'): + exts.append(Extension('_codecs_' + loc, + ['cjkcodecs/_codecs_%s.c' % loc])) # Dynamic loading module if sys.maxint == 0x7fffffff: @@ -811,7 +860,8 @@ # Linux-specific modules exts.append( Extension('linuxaudiodev', ['linuxaudiodev.c']) ) - if platform in ('linux2', 'freebsd4', 'freebsd5', 'freebsd6'): + if platform in ('linux2', 'freebsd4', 'freebsd5', 'freebsd6', + 'freebsd7'): exts.append( Extension('ossaudiodev', ['ossaudiodev.c']) ) if platform == 'sunos5': From jhylton at users.sourceforge.net Sun Oct 16 07:24:33 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:33 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/compiler ast.py, 1.21.2.1, 1.21.2.2 transformer.py, 1.34.2.2, 1.34.2.3 Message-ID: <20051016052433.3C9101E4019@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/compiler In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Lib/compiler Modified Files: Tag: ast-branch ast.py transformer.py Log Message: Merge head to branch (for the last time) Index: ast.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/compiler/ast.py,v retrieving revision 1.21.2.1 retrieving revision 1.21.2.2 diff -u -d -r1.21.2.1 -r1.21.2.2 --- ast.py 7 Jan 2005 06:58:13 -0000 1.21.2.1 +++ ast.py 16 Oct 2005 05:23:59 -0000 1.21.2.2 @@ -4,9 +4,9 @@ """ from consts import CO_VARARGS, CO_VARKEYWORDS -def flatten(list): +def flatten(seq): l = [] - for elt in list: + for elt in seq: t = type(elt) if t is tuple or t is list: for elt2 in flatten(elt): @@ -15,8 +15,8 @@ l.append(elt) return l -def flatten_nodes(list): - return [n for n in flatten(list) if isinstance(n, Node)] +def flatten_nodes(seq): + return [n for n in flatten(seq) if isinstance(n, Node)] nodes = {} Index: transformer.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/compiler/transformer.py,v retrieving revision 1.34.2.2 retrieving revision 1.34.2.3 diff -u -d -r1.34.2.2 -r1.34.2.3 --- transformer.py 7 Jan 2005 06:58:13 -0000 1.34.2.2 +++ transformer.py 16 Oct 2005 05:23:59 -0000 1.34.2.3 @@ -89,7 +89,7 @@ print nodes[kind], len(args), args raise else: - raise WalkerEror, "Can't find appropriate Node type: %s" % str(args) + raise WalkerError, "Can't find appropriate Node type: %s" % str(args) #return apply(ast.Node, args) class Transformer: @@ -165,7 +165,7 @@ if n == symbol.classdef: return self.classdef(node[1:]) - raise WalkerEror, ('unexpected node type', n) + raise WalkerError, ('unexpected node type', n) def single_input(self, node): ### do we want to do anything about being "interactive" ? @@ -280,12 +280,14 @@ return Lambda(names, defaults, flags, code, lineno=nodelist[1][2]) def classdef(self, nodelist): - # classdef: 'class' NAME ['(' testlist ')'] ':' suite + # classdef: 'class' NAME ['(' [testlist] ')'] ':' suite name = nodelist[1][1] doc = self.get_docstring(nodelist[-1]) if nodelist[2][0] == token.COLON: bases = [] + elif nodelist[3][0] == token.RPAR: + bases = [] else: bases = self.com_bases(nodelist[3]) @@ -314,31 +316,31 @@ return Stmt(stmts) def parameters(self, nodelist): - raise WalkerEror + raise WalkerError def varargslist(self, nodelist): - raise WalkerEror + raise WalkerError def fpdef(self, nodelist): - raise WalkerEror + raise WalkerError def fplist(self, nodelist): - raise WalkerEror + raise WalkerError def dotted_name(self, nodelist): - raise WalkerEror + raise WalkerError def comp_op(self, nodelist): - raise WalkerEror + raise WalkerError def trailer(self, nodelist): - raise WalkerEror + raise WalkerError def sliceop(self, nodelist): - raise WalkerEror + raise WalkerError def argument(self, nodelist): - raise WalkerEror + raise WalkerError # -------------------------------------------------------------- # @@ -401,7 +403,15 @@ return Return(self.com_node(nodelist[1]), lineno=nodelist[0][2]) def yield_stmt(self, nodelist): - return Yield(self.com_node(nodelist[1]), lineno=nodelist[0][2]) + expr = self.com_node(nodelist[0]) + return Discard(expr, lineno=expr.lineno) + + def yield_expr(self, nodelist): + if len(nodelist)>1: + value = nodelist[1] + else: + value = Const(None) + return Yield(self.com_node(value), lineno=nodelist[0][2]) def raise_stmt(self, nodelist): # raise: [test [',' test [',' test]]] @@ -1400,6 +1410,8 @@ if hasattr(symbol, 'yield_stmt'): _legal_node_types.append(symbol.yield_stmt) +if hasattr(symbol, 'yield_expr'): + _legal_node_types.append(symbol.yield_expr) _assign_types = [ symbol.test, From jhylton at users.sourceforge.net Sun Oct 16 07:24:33 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:33 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/distutils/tests test_versionpredicate.py, NONE, 1.2.2.2 test_dist.py, 1.1.4.1, 1.1.4.2 Message-ID: <20051016052433.830EF1E4013@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/distutils/tests In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Lib/distutils/tests Modified Files: Tag: ast-branch test_dist.py Added Files: Tag: ast-branch test_versionpredicate.py Log Message: Merge head to branch (for the last time) --- NEW FILE: test_versionpredicate.py --- """Tests harness for distutils.versionpredicate. """ import distutils.versionpredicate import doctest def test_suite(): return doctest.DocTestSuite(distutils.versionpredicate) Index: test_dist.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/distutils/tests/test_dist.py,v retrieving revision 1.1.4.1 retrieving revision 1.1.4.2 diff -u -d -r1.1.4.1 -r1.1.4.2 --- test_dist.py 7 Jan 2005 06:58:20 -0000 1.1.4.1 +++ test_dist.py 16 Oct 2005 05:24:00 -0000 1.1.4.2 @@ -4,6 +4,7 @@ import distutils.dist import os import shutil +import StringIO import sys import tempfile import unittest @@ -96,5 +97,93 @@ os.unlink(TESTFN) +class MetadataTestCase(unittest.TestCase): + + def test_simple_metadata(self): + attrs = {"name": "package", + "version": "1.0"} + dist = distutils.dist.Distribution(attrs) + meta = self.format_metadata(dist) + self.assert_("Metadata-Version: 1.0" in meta) + self.assert_("provides:" not in meta.lower()) + self.assert_("requires:" not in meta.lower()) + self.assert_("obsoletes:" not in meta.lower()) + + def test_provides(self): + attrs = {"name": "package", + "version": "1.0", + "provides": ["package", "package.sub"]} + dist = distutils.dist.Distribution(attrs) + self.assertEqual(dist.metadata.get_provides(), + ["package", "package.sub"]) + self.assertEqual(dist.get_provides(), + ["package", "package.sub"]) + meta = self.format_metadata(dist) + self.assert_("Metadata-Version: 1.1" in meta) + self.assert_("requires:" not in meta.lower()) + self.assert_("obsoletes:" not in meta.lower()) + + def test_provides_illegal(self): + self.assertRaises(ValueError, + distutils.dist.Distribution, + {"name": "package", + "version": "1.0", + "provides": ["my.pkg (splat)"]}) + + def test_requires(self): + attrs = {"name": "package", + "version": "1.0", + "requires": ["other", "another (==1.0)"]} + dist = distutils.dist.Distribution(attrs) + self.assertEqual(dist.metadata.get_requires(), + ["other", "another (==1.0)"]) + self.assertEqual(dist.get_requires(), + ["other", "another (==1.0)"]) + meta = self.format_metadata(dist) + self.assert_("Metadata-Version: 1.1" in meta) + self.assert_("provides:" not in meta.lower()) + self.assert_("Requires: other" in meta) + self.assert_("Requires: another (==1.0)" in meta) + self.assert_("obsoletes:" not in meta.lower()) + + def test_requires_illegal(self): + self.assertRaises(ValueError, + distutils.dist.Distribution, + {"name": "package", + "version": "1.0", + "requires": ["my.pkg (splat)"]}) + + def test_obsoletes(self): + attrs = {"name": "package", + "version": "1.0", + "obsoletes": ["other", "another (<1.0)"]} + dist = distutils.dist.Distribution(attrs) + self.assertEqual(dist.metadata.get_obsoletes(), + ["other", "another (<1.0)"]) + self.assertEqual(dist.get_obsoletes(), + ["other", "another (<1.0)"]) + meta = self.format_metadata(dist) + self.assert_("Metadata-Version: 1.1" in meta) + self.assert_("provides:" not in meta.lower()) + self.assert_("requires:" not in meta.lower()) + self.assert_("Obsoletes: other" in meta) + self.assert_("Obsoletes: another (<1.0)" in meta) + + def test_obsoletes_illegal(self): + self.assertRaises(ValueError, + distutils.dist.Distribution, + {"name": "package", + "version": "1.0", + "obsoletes": ["my.pkg (splat)"]}) + + def format_metadata(self, dist): + sio = StringIO.StringIO() + dist.metadata.write_pkg_file(sio) + return sio.getvalue() + + def test_suite(): - return unittest.makeSuite(DistributionTestCase) + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(DistributionTestCase)) + suite.addTest(unittest.makeSuite(MetadataTestCase)) + return suite From jhylton at users.sourceforge.net Sun Oct 16 07:24:33 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:33 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/lib-tk Tkinter.py, 1.162.2.2, 1.162.2.3 Message-ID: <20051016052433.A2E2F1E400A@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/lib-tk In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Lib/lib-tk Modified Files: Tag: ast-branch Tkinter.py Log Message: Merge head to branch (for the last time) Index: Tkinter.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/lib-tk/Tkinter.py,v retrieving revision 1.162.2.2 retrieving revision 1.162.2.3 diff -u -d -r1.162.2.2 -r1.162.2.3 --- Tkinter.py 7 Jan 2005 06:58:39 -0000 1.162.2.2 +++ Tkinter.py 16 Oct 2005 05:24:00 -0000 1.162.2.3 @@ -1599,8 +1599,15 @@ raise RuntimeError, \ "Tk 4.0 or higher is required; found Tk %s" \ % str(TkVersion) + # Create and register the tkerror and exit commands + # We need to inline parts of _register here, _ register + # would register differently-named commands. + if self._tclCommands is None: + self._tclCommands = [] self.tk.createcommand('tkerror', _tkerror) self.tk.createcommand('exit', _exit) + self._tclCommands.append('tkerror') + self._tclCommands.append('exit') if _support_default_root and not _default_root: _default_root = self self.protocol("WM_DELETE_WINDOW", self.destroy) @@ -1645,12 +1652,6 @@ def __getattr__(self, attr): "Delegate attribute access to the interpreter object" return getattr(self.tk, attr) - def __hasattr__(self, attr): - "Delegate attribute access to the interpreter object" - return hasattr(self.tk, attr) - def __delattr__(self, attr): - "Delegate attribute access to the interpreter object" - return delattr(self.tk, attr) # Ideally, the classes Pack, Place and Grid disappear, the # pack/place/grid methods are defined on the Widget class, and From jhylton at users.sourceforge.net Sun Oct 16 07:24:33 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:33 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/encodings __init__.py, 1.8.2.2, 1.8.2.3 aliases.py, 1.13.2.2, 1.13.2.3 idna.py, 1.2.4.2, 1.2.4.3 utf_16.py, 1.3.2.2, 1.3.2.3 Message-ID: <20051016052433.A48961E400E@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/encodings In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Lib/encodings Modified Files: Tag: ast-branch __init__.py aliases.py idna.py utf_16.py Log Message: Merge head to branch (for the last time) Index: __init__.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/__init__.py,v retrieving revision 1.8.2.2 retrieving revision 1.8.2.3 diff -u -d -r1.8.2.2 -r1.8.2.3 --- __init__.py 7 Jan 2005 06:58:21 -0000 1.8.2.2 +++ __init__.py 16 Oct 2005 05:24:00 -0000 1.8.2.3 @@ -27,7 +27,7 @@ """#" -import codecs, exceptions, types, aliases +import codecs, types, aliases _cache = {} _unknown = '--unknown--' @@ -40,8 +40,7 @@ ' ') _aliases = aliases.aliases -class CodecRegistryError(exceptions.LookupError, - exceptions.SystemError): +class CodecRegistryError(LookupError, SystemError): pass def normalize_encoding(encoding): Index: aliases.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/aliases.py,v retrieving revision 1.13.2.2 retrieving revision 1.13.2.3 diff -u -d -r1.13.2.2 -r1.13.2.3 --- aliases.py 7 Jan 2005 06:58:36 -0000 1.13.2.2 +++ aliases.py 16 Oct 2005 05:24:00 -0000 1.13.2.3 @@ -482,6 +482,7 @@ # utf_7 codec 'u7' : 'utf_7', 'utf7' : 'utf_7', + 'unicode_1_1_utf_7' : 'utf_7', # utf_8 codec 'u8' : 'utf_8', Index: idna.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/idna.py,v retrieving revision 1.2.4.2 retrieving revision 1.2.4.3 diff -u -d -r1.2.4.2 -r1.2.4.3 --- idna.py 7 Jan 2005 06:58:36 -0000 1.2.4.2 +++ idna.py 16 Oct 2005 05:24:00 -0000 1.2.4.3 @@ -149,6 +149,9 @@ # IDNA is quite clear that implementations must be strict raise UnicodeError, "unsupported error handling "+errors + if not input: + return "", 0 + result = [] labels = dots.split(input) if labels and len(labels[-1])==0: @@ -166,6 +169,9 @@ if errors != 'strict': raise UnicodeError, "Unsupported error handling "+errors + if not input: + return u"", 0 + # IDNA allows decoding to operate on Unicode strings, too. if isinstance(input, unicode): labels = dots.split(input) Index: utf_16.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/utf_16.py,v retrieving revision 1.3.2.2 retrieving revision 1.3.2.3 diff -u -d -r1.3.2.2 -r1.3.2.3 --- utf_16.py 7 Jan 2005 06:58:36 -0000 1.3.2.2 +++ utf_16.py 16 Oct 2005 05:24:00 -0000 1.3.2.3 @@ -31,6 +31,13 @@ class StreamReader(codecs.StreamReader): + def reset(self): + codecs.StreamReader.reset(self) + try: + del self.decode + except AttributeError: + pass + def decode(self, input, errors='strict'): (object, consumed, byteorder) = \ codecs.utf_16_ex_decode(input, errors, 0, False) From jhylton at users.sourceforge.net Sun Oct 16 07:24:33 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:33 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/idlelib CodeContext.py, 1.4.6.1, 1.4.6.2 Message-ID: <20051016052433.C67B61E4011@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/idlelib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Lib/idlelib Modified Files: Tag: ast-branch CodeContext.py Log Message: Merge head to branch (for the last time) Index: CodeContext.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/idlelib/CodeContext.py,v retrieving revision 1.4.6.1 retrieving revision 1.4.6.2 diff -u -d -r1.4.6.1 -r1.4.6.2 --- CodeContext.py 7 Jan 2005 06:58:37 -0000 1.4.6.1 +++ CodeContext.py 16 Oct 2005 05:24:00 -0000 1.4.6.2 @@ -1,33 +1,33 @@ -"""CodeContext - Display the block context of code at top of edit window +"""CodeContext - Extension to display the block context above the edit window -Once code has scrolled off the top of the screen, it can be difficult -to determine which block you are in. This extension implements a pane -at the top of each IDLE edit window which provides block structure -hints. These hints are the lines which contain the block opening -keywords, e.g. 'if', for the enclosing block. The number of hint lines -is determined by the numlines variable in the CodeContext section of -config-extensions.def. Lines which do not open blocks are not shown in -the context hints pane. +Once code has scrolled off the top of a window, it can be difficult to +determine which block you are in. This extension implements a pane at the top +of each IDLE edit window which provides block structure hints. These hints are +the lines which contain the block opening keywords, e.g. 'if', for the +enclosing block. The number of hint lines is determined by the numlines +variable in the CodeContext section of config-extensions.def. Lines which do +not open blocks are not shown in the context hints pane. """ import Tkinter from configHandler import idleConf from sets import Set import re +from sys import maxint as INFINITY BLOCKOPENERS = Set(["class", "def", "elif", "else", "except", "finally", "for", "if", "try", "while"]) -INFINITY = 1 << 30 UPDATEINTERVAL = 100 # millisec FONTUPDATEINTERVAL = 1000 # millisec -getspacesfirstword = lambda s, c=re.compile(r"^(\s*)(\w*)"): c.match(s).groups() +getspacesfirstword =\ + lambda s, c=re.compile(r"^(\s*)(\w*)"): c.match(s).groups() class CodeContext: menudefs = [('options', [('!Code Conte_xt', '<>')])] - numlines = idleConf.GetOption("extensions", "CodeContext", - "numlines", type="int", default=3) + context_depth = idleConf.GetOption("extensions", "CodeContext", + "numlines", type="int", default=3) bgcolor = idleConf.GetOption("extensions", "CodeContext", "bgcolor", type="str", default="LightGray") fgcolor = idleConf.GetOption("extensions", "CodeContext", @@ -37,9 +37,13 @@ self.text = editwin.text self.textfont = self.text["font"] self.label = None - # Dummy line, which starts the "block" of the whole document: - self.info = list(self.interesting_lines(1)) - self.lastfirstline = 1 + # self.info is a list of (line number, indent level, line text, block + # keyword) tuples providing the block structure associated with + # self.topvisible (the linenumber of the line displayed at the top of + # the edit window). self.info[0] is initialized as a 'dummy' line which + # starts the toplevel 'block' of the module. + self.info = [(0, -1, "", False)] + self.topvisible = 1 visible = idleConf.GetOption("extensions", "CodeContext", "visible", type="bool", default=False) if visible: @@ -51,18 +55,25 @@ def toggle_code_context_event(self, event=None): if not self.label: - self.label = Tkinter.Label(self.editwin.top, - text="\n" * (self.numlines - 1), + self.pad_frame = Tkinter.Frame(self.editwin.top, + bg=self.bgcolor, border=2, + relief="sunken") + self.label = Tkinter.Label(self.pad_frame, + text="\n" * (self.context_depth - 1), anchor="w", justify="left", font=self.textfont, bg=self.bgcolor, fg=self.fgcolor, - relief="sunken", + border=0, width=1, # Don't request more than we get ) - self.label.pack(side="top", fill="x", expand=0, - after=self.editwin.status_bar) + self.label.pack(side="top", fill="x", expand=True, + padx=4, pady=0) + self.pad_frame.pack(side="top", fill="x", expand=False, + padx=0, pady=0, + after=self.editwin.status_bar) else: self.label.destroy() + self.pad_frame.destroy() self.label = None idleConf.SetOption("extensions", "CodeContext", "visible", str(self.label is not None)) @@ -73,14 +84,8 @@ If the line does not start a block, the keyword value is False. The indentation of empty lines (or comment lines) is INFINITY. - There is a dummy block start, with indentation -1 and text "". - - Return the indent level, text (including leading whitespace), - and the block opening keyword. """ - if linenum == 0: - return -1, "", True text = self.text.get("%d.0" % linenum, "%d.end" % linenum) spaces, firstword = getspacesfirstword(text) opener = firstword in BLOCKOPENERS and firstword @@ -90,45 +95,69 @@ indent = len(spaces) return indent, text, opener - def interesting_lines(self, firstline): - """Generator which yields context lines, starting at firstline.""" + def get_context(self, new_topvisible, stopline=1, stopindent=0): + """Get context lines, starting at new_topvisible and working backwards. + + Stop when stopline or stopindent is reached. Return a tuple of context + data and the indent level at the top of the region inspected. + + """ + assert stopline > 0 + lines = [] # The indentation level we are currently in: lastindent = INFINITY # For a line to be interesting, it must begin with a block opening # keyword, and have less indentation than lastindent. - for line_index in xrange(firstline, -1, -1): - indent, text, opener = self.get_line_info(line_index) + for linenum in xrange(new_topvisible, stopline-1, -1): + indent, text, opener = self.get_line_info(linenum) if indent < lastindent: lastindent = indent if opener in ("else", "elif"): # We also show the if statement lastindent += 1 - if opener and line_index < firstline: - yield line_index, text + if opener and linenum < new_topvisible and indent >= stopindent: + lines.append((linenum, indent, text, opener)) + if lastindent <= stopindent: + break + lines.reverse() + return lines, lastindent - def update_label(self): - firstline = int(self.text.index("@0,0").split('.')[0]) - if self.lastfirstline == firstline: + def update_code_context(self): + """Update context information and lines visible in the context pane. + + """ + new_topvisible = int(self.text.index("@0,0").split('.')[0]) + if self.topvisible == new_topvisible: # haven't scrolled return - self.lastfirstline = firstline - tmpstack = [] - for line_index, text in self.interesting_lines(firstline): - # Remove irrelevant self.info items, and when we reach a relevant - # item (which must happen because of the dummy element), break. - while self.info[-1][0] > line_index: + if self.topvisible < new_topvisible: # scroll down + lines, lastindent = self.get_context(new_topvisible, + self.topvisible) + # retain only context info applicable to the region + # between topvisible and new_topvisible: + while self.info[-1][1] >= lastindent: del self.info[-1] - if self.info[-1][0] == line_index: - break - tmpstack.append((line_index, text)) - while tmpstack: - self.info.append(tmpstack.pop()) - lines = [""] * max(0, self.numlines - len(self.info)) + \ - [x[1] for x in self.info[-self.numlines:]] - self.label["text"] = '\n'.join(lines) + elif self.topvisible > new_topvisible: # scroll up + stopindent = self.info[-1][1] + 1 + # retain only context info associated + # with lines above new_topvisible: + while self.info[-1][0] >= new_topvisible: + stopindent = self.info[-1][1] + del self.info[-1] + lines, lastindent = self.get_context(new_topvisible, + self.info[-1][0]+1, + stopindent) + self.info.extend(lines) + self.topvisible = new_topvisible + + # empty lines in context pane: + context_strings = [""] * max(0, self.context_depth - len(self.info)) + # followed by the context hint lines: + context_strings += [x[2] for x in self.info[-self.context_depth:]] + self.label["text"] = '\n'.join(context_strings) def timer_event(self): if self.label: - self.update_label() + self.update_code_context() self.text.after(UPDATEINTERVAL, self.timer_event) def font_timer_event(self): From jhylton at users.sourceforge.net Sun Oct 16 07:24:33 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:33 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/plat-riscos riscospath.py, 1.8.6.2, 1.8.6.3 Message-ID: <20051016052433.E34B51E4017@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/plat-riscos In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Lib/plat-riscos Modified Files: Tag: ast-branch riscospath.py Log Message: Merge head to branch (for the last time) Index: riscospath.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/plat-riscos/riscospath.py,v retrieving revision 1.8.6.2 retrieving revision 1.8.6.3 diff -u -d -r1.8.6.2 -r1.8.6.3 --- riscospath.py 7 Jan 2005 06:58:45 -0000 1.8.6.2 +++ riscospath.py 16 Oct 2005 05:24:00 -0000 1.8.6.3 @@ -168,23 +168,16 @@ return split(p)[0] -def commonprefix(ps): - """ - Return the longest prefix of all list elements. Purely string-based; does not - separate any path parts. Why am I in os.path? - """ - if len(ps)==0: - return '' - prefix= ps[0] - for p in ps[1:]: - prefix= prefix[:len(p)] - for i in range(len(prefix)): - if prefix[i] <> p[i]: - prefix= prefix[:i] - if i==0: - return '' - break - return prefix +def commonprefix(m): + "Given a list of pathnames, returns the longest common leading component" + if not m: return '' + s1 = min(m) + s2 = max(m) + n = min(len(s1), len(s2)) + for i in xrange(n): + if s1[i] != s2[i]: + return s1[:i] + return s1[:n] ## File access functions. Why are we in os.path? From jhylton at users.sourceforge.net Sun Oct 16 07:24:33 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:33 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/distutils versionpredicate.py, NONE, 1.4.2.2 core.py, 1.50.2.2, 1.50.2.3 dir_util.py, 1.9.2.2, 1.9.2.3 dist.py, 1.55.2.2, 1.55.2.3 msvccompiler.py, 1.49.2.2, 1.49.2.3 sysconfig.py, 1.49.2.4, 1.49.2.5 Message-ID: <20051016052433.E0B741E4016@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/distutils In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Lib/distutils Modified Files: Tag: ast-branch core.py dir_util.py dist.py msvccompiler.py sysconfig.py Added Files: Tag: ast-branch versionpredicate.py Log Message: Merge head to branch (for the last time) --- NEW FILE: versionpredicate.py --- """Module for parsing and testing package version predicate strings. """ import re import distutils.version import operator re_validPackage = re.compile(r"(?i)^\s*([a-z_]\w*(?:\.[a-z_]\w*)*)(.*)") # (package) (rest) re_paren = re.compile(r"^\s*\((.*)\)\s*$") # (list) inside of parentheses re_splitComparison = re.compile(r"^\s*(<=|>=|<|>|!=|==)\s*([^\s,]+)\s*$") # (comp) (version) def splitUp(pred): """Parse a single version comparison. Return (comparison string, StrictVersion) """ res = re_splitComparison.match(pred) if not res: raise ValueError("bad package restriction syntax: %r" % pred) comp, verStr = res.groups() return (comp, distutils.version.StrictVersion(verStr)) compmap = {"<": operator.lt, "<=": operator.le, "==": operator.eq, ">": operator.gt, ">=": operator.ge, "!=": operator.ne} class VersionPredicate: """Parse and test package version predicates. >>> v = VersionPredicate('pyepat.abc (>1.0, <3333.3a1, !=1555.1b3)') The `name` attribute provides the full dotted name that is given:: >>> v.name 'pyepat.abc' The str() of a `VersionPredicate` provides a normalized human-readable version of the expression:: >>> print v pyepat.abc (> 1.0, < 3333.3a1, != 1555.1b3) The `satisfied_by()` method can be used to determine with a given version number is included in the set described by the version restrictions:: >>> v.satisfied_by('1.1') True >>> v.satisfied_by('1.4') True >>> v.satisfied_by('1.0') False >>> v.satisfied_by('4444.4') False >>> v.satisfied_by('1555.1b3') False `VersionPredicate` is flexible in accepting extra whitespace:: >>> v = VersionPredicate(' pat( == 0.1 ) ') >>> v.name 'pat' >>> v.satisfied_by('0.1') True >>> v.satisfied_by('0.2') False If any version numbers passed in do not conform to the restrictions of `StrictVersion`, a `ValueError` is raised:: >>> v = VersionPredicate('p1.p2.p3.p4(>=1.0, <=1.3a1, !=1.2zb3)') Traceback (most recent call last): ... ValueError: invalid version number '1.2zb3' It the module or package name given does not conform to what's allowed as a legal module or package name, `ValueError` is raised:: >>> v = VersionPredicate('foo-bar') Traceback (most recent call last): ... ValueError: expected parenthesized list: '-bar' >>> v = VersionPredicate('foo bar (12.21)') Traceback (most recent call last): ... ValueError: expected parenthesized list: 'bar (12.21)' """ def __init__(self, versionPredicateStr): """Parse a version predicate string. """ # Fields: # name: package name # pred: list of (comparison string, StrictVersion) versionPredicateStr = versionPredicateStr.strip() if not versionPredicateStr: raise ValueError("empty package restriction") match = re_validPackage.match(versionPredicateStr) if not match: raise ValueError("bad package name in %r" % versionPredicateStr) self.name, paren = match.groups() paren = paren.strip() if paren: match = re_paren.match(paren) if not match: raise ValueError("expected parenthesized list: %r" % paren) str = match.groups()[0] self.pred = [splitUp(aPred) for aPred in str.split(",")] if not self.pred: raise ValueError("empty parenthesized list in %r" % versionPredicateStr) else: self.pred = [] def __str__(self): if self.pred: seq = [cond + " " + str(ver) for cond, ver in self.pred] return self.name + " (" + ", ".join(seq) + ")" else: return self.name def satisfied_by(self, version): """True if version is compatible with all the predicates in self. The parameter version must be acceptable to the StrictVersion constructor. It may be either a string or StrictVersion. """ for cond, ver in self.pred: if not compmap[cond](version, ver): return False return True _provision_rx = None def split_provision(value): """Return the name and optional version number of a provision. The version number, if given, will be returned as a `StrictVersion` instance, otherwise it will be `None`. >>> split_provision('mypkg') ('mypkg', None) >>> split_provision(' mypkg( 1.2 ) ') ('mypkg', StrictVersion ('1.2')) """ global _provision_rx if _provision_rx is None: _provision_rx = re.compile( "([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)*)(?:\s*\(\s*([^)\s]+)\s*\))?$") value = value.strip() m = _provision_rx.match(value) if not m: raise ValueError("illegal provides specification: %r" % value) ver = m.group(2) or None if ver: ver = distutils.version.StrictVersion(ver) return m.group(1), ver Index: core.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/distutils/core.py,v retrieving revision 1.50.2.2 retrieving revision 1.50.2.3 diff -u -d -r1.50.2.2 -r1.50.2.3 --- core.py 7 Jan 2005 06:58:15 -0000 1.50.2.2 +++ core.py 16 Oct 2005 05:24:00 -0000 1.50.2.3 @@ -47,7 +47,9 @@ 'name', 'version', 'author', 'author_email', 'maintainer', 'maintainer_email', 'url', 'license', 'description', 'long_description', 'keywords', - 'platforms', 'classifiers', 'download_url',) + 'platforms', 'classifiers', 'download_url', + 'requires', 'provides', 'obsoletes', + ) # Legal keyword arguments for the Extension constructor extension_keywords = ('name', 'sources', 'include_dirs', Index: dir_util.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/distutils/dir_util.py,v retrieving revision 1.9.2.2 retrieving revision 1.9.2.3 diff -u -d -r1.9.2.2 -r1.9.2.3 --- dir_util.py 7 Jan 2005 06:58:16 -0000 1.9.2.2 +++ dir_util.py 16 Oct 2005 05:24:00 -0000 1.9.2.3 @@ -31,7 +31,7 @@ global _path_created # Detect a common bug -- name is None - if type(name) is not StringType: + if not isinstance(name, StringTypes): raise DistutilsInternalError, \ "mkpath: 'name' must be a string (got %r)" % (name,) Index: dist.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/distutils/dist.py,v retrieving revision 1.55.2.2 retrieving revision 1.55.2.3 diff -u -d -r1.55.2.2 -r1.55.2.3 --- dist.py 7 Jan 2005 06:58:16 -0000 1.55.2.2 +++ dist.py 16 Oct 2005 05:24:00 -0000 1.55.2.3 @@ -59,6 +59,15 @@ ('help', 'h', "show detailed help message"), ] + # 'common_usage' is a short (2-3 line) string describing the common + # usage of the setup script. + common_usage = """\ +Common commands: (see '--help-commands' for more) + + setup.py build will build the package underneath 'build/' + setup.py install will install the package +""" + # options that are not propagated to the commands display_options = [ ('help-commands', None, @@ -97,6 +106,12 @@ "print the list of classifiers"), ('keywords', None, "print the list of keywords"), + ('provides', None, + "print the list of packages/modules provided"), + ('requires', None, + "print the list of packages/modules required"), + ('obsoletes', None, + "print the list of packages/modules made obsolete") ] display_option_names = map(lambda x: translate_longopt(x[0]), display_options) @@ -162,6 +177,17 @@ # command_options = { command_name : { option : (source, value) } } self.command_options = {} + # 'dist_files' is the list of (command, pyversion, file) that + # have been created by any dist commands run so far. This is + # filled regardless of whether the run is dry or not. pyversion + # gives sysconfig.get_python_version() if the dist file is + # specific to a Python version, 'any' if it is good for all + # Python versions on the target platform, and '' for a source + # file. pyversion should not be used to specify minimum or + # maximum required Python versions; use the metainfo for that + # instead. + self.dist_files = [] + # These options are really the business of various commands, rather # than of the Distribution itself. We provide aliases for them in # Distribution as a convenience to the developer. @@ -201,7 +227,6 @@ # distribution options. if attrs: - # Pull out the set of command options and work on them # specifically. Note that this order guarantees that aliased # command options will override any supplied redundantly @@ -226,7 +251,9 @@ # Now work on the rest of the attributes. Any attribute that's # not already defined is invalid! for (key,val) in attrs.items(): - if hasattr(self.metadata, key): + if hasattr(self.metadata, "set_" + key): + getattr(self.metadata, "set_" + key)(val) + elif hasattr(self.metadata, key): setattr(self.metadata, key, val) elif hasattr(self, key): setattr(self, key, val) @@ -608,7 +635,7 @@ else: options = self.global_options parser.set_option_table(options) - parser.print_help("Global options:") + parser.print_help(self.common_usage + "\nGlobal options:") print if display_options: @@ -669,7 +696,8 @@ value = getattr(self.metadata, "get_"+opt)() if opt in ['keywords', 'platforms']: print string.join(value, ',') - elif opt == 'classifiers': + elif opt in ('classifiers', 'provides', 'requires', + 'obsoletes'): print string.join(value, '\n') else: print value @@ -1015,7 +1043,10 @@ "license", "description", "long_description", "keywords", "platforms", "fullname", "contact", "contact_email", "license", "classifiers", - "download_url") + "download_url", + # PEP 314 + "provides", "requires", "obsoletes", + ) def __init__ (self): self.name = None @@ -1032,40 +1063,58 @@ self.platforms = None self.classifiers = None self.download_url = None + # PEP 314 + self.provides = None + self.requires = None + self.obsoletes = None def write_pkg_info (self, base_dir): """Write the PKG-INFO file into the release tree. """ - pkg_info = open( os.path.join(base_dir, 'PKG-INFO'), 'w') - pkg_info.write('Metadata-Version: 1.0\n') - pkg_info.write('Name: %s\n' % self.get_name() ) - pkg_info.write('Version: %s\n' % self.get_version() ) - pkg_info.write('Summary: %s\n' % self.get_description() ) - pkg_info.write('Home-page: %s\n' % self.get_url() ) - pkg_info.write('Author: %s\n' % self.get_contact() ) - pkg_info.write('Author-email: %s\n' % self.get_contact_email() ) - pkg_info.write('License: %s\n' % self.get_license() ) + self.write_pkg_file(pkg_info) + + pkg_info.close() + + # write_pkg_info () + + def write_pkg_file (self, file): + """Write the PKG-INFO format data to a file object. + """ + version = '1.0' + if self.provides or self.requires or self.obsoletes: + version = '1.1' + + file.write('Metadata-Version: %s\n' % version) + file.write('Name: %s\n' % self.get_name() ) + file.write('Version: %s\n' % self.get_version() ) + file.write('Summary: %s\n' % self.get_description() ) + file.write('Home-page: %s\n' % self.get_url() ) + file.write('Author: %s\n' % self.get_contact() ) + file.write('Author-email: %s\n' % self.get_contact_email() ) + file.write('License: %s\n' % self.get_license() ) if self.download_url: - pkg_info.write('Download-URL: %s\n' % self.download_url) + file.write('Download-URL: %s\n' % self.download_url) long_desc = rfc822_escape( self.get_long_description() ) - pkg_info.write('Description: %s\n' % long_desc) + file.write('Description: %s\n' % long_desc) keywords = string.join( self.get_keywords(), ',') if keywords: - pkg_info.write('Keywords: %s\n' % keywords ) - - for platform in self.get_platforms(): - pkg_info.write('Platform: %s\n' % platform ) + file.write('Keywords: %s\n' % keywords ) - for classifier in self.get_classifiers(): - pkg_info.write('Classifier: %s\n' % classifier ) + self._write_list(file, 'Platform', self.get_platforms()) + self._write_list(file, 'Classifier', self.get_classifiers()) - pkg_info.close() + # PEP 314 + self._write_list(file, 'Requires', self.get_requires()) + self._write_list(file, 'Provides', self.get_provides()) + self._write_list(file, 'Obsoletes', self.get_obsoletes()) - # write_pkg_info () + def _write_list (self, file, name, values): + for value in values: + file.write('%s: %s\n' % (name, value)) # -- Metadata query methods ---------------------------------------- @@ -1125,6 +1174,36 @@ def get_download_url(self): return self.download_url or "UNKNOWN" + # PEP 314 + + def get_requires(self): + return self.requires or [] + + def set_requires(self, value): + import distutils.versionpredicate + for v in value: + distutils.versionpredicate.VersionPredicate(v) + self.requires = value + + def get_provides(self): + return self.provides or [] + + def set_provides(self, value): + value = [v.strip() for v in value] + for v in value: + import distutils.versionpredicate + distutils.versionpredicate.split_provision(v) + self.provides = value + + def get_obsoletes(self): + return self.obsoletes or [] + + def set_obsoletes(self, value): + import distutils.versionpredicate + for v in value: + distutils.versionpredicate.VersionPredicate(v) + self.obsoletes = value + # class DistributionMetadata Index: msvccompiler.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/distutils/msvccompiler.py,v retrieving revision 1.49.2.2 retrieving revision 1.49.2.3 diff -u -d -r1.49.2.2 -r1.49.2.3 --- msvccompiler.py 7 Jan 2005 06:58:16 -0000 1.49.2.2 +++ msvccompiler.py 16 Oct 2005 05:24:00 -0000 1.49.2.3 @@ -211,6 +211,9 @@ self.__macros = MacroExpander(self.__version) else: self.__root = r"Software\Microsoft\Devstudio" + self.initialized = False + + def initialize(self): self.__paths = self.get_msvc_paths("path") if len (self.__paths) == 0: @@ -252,6 +255,7 @@ ] self.ldflags_static = [ '/nologo'] + self.initialized = True # -- Worker methods ------------------------------------------------ @@ -265,6 +269,8 @@ obj_names = [] for src_name in source_filenames: (base, ext) = os.path.splitext (src_name) + base = os.path.splitdrive(base)[1] # Chop off the drive + base = base[os.path.isabs(base):] # If abs, chop off leading / if ext not in self.src_extensions: # Better to raise an exception instead of silently continuing # and later complain about sources and targets having @@ -290,6 +296,7 @@ output_dir=None, macros=None, include_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, depends=None): + if not self.initialized: self.initialize() macros, objects, extra_postargs, pp_opts, build = \ self._setup_compile(output_dir, macros, include_dirs, sources, depends, extra_postargs) @@ -381,6 +388,7 @@ debug=0, target_lang=None): + if not self.initialized: self.initialize() (objects, output_dir) = self._fix_object_args (objects, output_dir) output_filename = \ self.library_filename (output_libname, output_dir=output_dir) @@ -414,6 +422,7 @@ build_temp=None, target_lang=None): + if not self.initialized: self.initialize() (objects, output_dir) = self._fix_object_args (objects, output_dir) (libraries, library_dirs, runtime_library_dirs) = \ self._fix_lib_args (libraries, library_dirs, runtime_library_dirs) Index: sysconfig.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/distutils/sysconfig.py,v retrieving revision 1.49.2.4 retrieving revision 1.49.2.5 diff -u -d -r1.49.2.4 -r1.49.2.5 --- sysconfig.py 7 Jan 2005 06:58:16 -0000 1.49.2.4 +++ sysconfig.py 16 Oct 2005 05:24:00 -0000 1.49.2.5 @@ -34,7 +34,7 @@ del argv0_path, landmark -def get_python_version (): +def get_python_version(): """Return a string containing the major and minor Python version, leaving off the patchlevel. Sample return values could be '1.5' or '2.2'. @@ -65,7 +65,7 @@ if not os.path.exists(inc_dir): inc_dir = os.path.join(os.path.dirname(base), "Include") return inc_dir - return os.path.join(prefix, "include", "python" + sys.version[:3]) + return os.path.join(prefix, "include", "python" + get_python_version()) elif os.name == "nt": return os.path.join(prefix, "include") elif os.name == "mac": @@ -110,7 +110,7 @@ if standard_lib: return os.path.join(prefix, "Lib") else: - if sys.version < "2.2": + if get_python_version() < "2.2": return prefix else: return os.path.join(PREFIX, "Lib", "site-packages") @@ -146,8 +146,9 @@ varies across Unices and is stored in Python's Makefile. """ if compiler.compiler_type == "unix": - (cc, cxx, opt, basecflags, ccshared, ldshared, so_ext) = \ - get_config_vars('CC', 'CXX', 'OPT', 'BASECFLAGS', 'CCSHARED', 'LDSHARED', 'SO') + (cc, cxx, opt, cflags, ccshared, ldshared, so_ext) = \ + get_config_vars('CC', 'CXX', 'OPT', 'CFLAGS', + 'CCSHARED', 'LDSHARED', 'SO') if os.environ.has_key('CC'): cc = os.environ['CC'] @@ -161,17 +162,15 @@ cpp = cc + " -E" # not always if os.environ.has_key('LDFLAGS'): ldshared = ldshared + ' ' + os.environ['LDFLAGS'] - if basecflags: - opt = basecflags + ' ' + opt if os.environ.has_key('CFLAGS'): - opt = opt + ' ' + os.environ['CFLAGS'] + cflags = opt + ' ' + os.environ['CFLAGS'] ldshared = ldshared + ' ' + os.environ['CFLAGS'] if os.environ.has_key('CPPFLAGS'): cpp = cpp + ' ' + os.environ['CPPFLAGS'] - opt = opt + ' ' + os.environ['CPPFLAGS'] + cflags = cflags + ' ' + os.environ['CPPFLAGS'] ldshared = ldshared + ' ' + os.environ['CPPFLAGS'] - cc_cmd = cc + ' ' + opt + cc_cmd = cc + ' ' + cflags compiler.set_executables( preprocessor=cpp, compiler=cc_cmd, @@ -189,7 +188,7 @@ inc_dir = os.curdir else: inc_dir = get_python_inc(plat_specific=1) - if sys.version < '2.2': + if get_python_version() < '2.2': config_h = 'config.h' else: # The name of the config.h file changed in 2.2 @@ -277,25 +276,20 @@ m = _findvar1_rx.search(value) or _findvar2_rx.search(value) if m: n = m.group(1) + found = True if done.has_key(n): - after = value[m.end():] - value = value[:m.start()] + str(done[n]) + after - if "$" in after: - notdone[name] = value - else: - try: value = int(value) - except ValueError: - done[name] = string.strip(value) - else: - done[name] = value - del notdone[name] + item = str(done[n]) elif notdone.has_key(n): # get it on a subsequent round - pass + found = False + elif os.environ.has_key(n): + # do it like make: fall back to environment + item = os.environ[n] else: - done[n] = "" + done[n] = item = "" + if found: after = value[m.end():] - value = value[:m.start()] + after + value = value[:m.start()] + item + after if "$" in after: notdone[name] = value else: @@ -378,7 +372,7 @@ if python_build: g['LDSHARED'] = g['BLDSHARED'] - elif sys.version < '2.1': + elif get_python_version() < '2.1': # The following two branches are for 1.5.2 compatibility. if sys.platform == 'aix4': # what about AIX 3.x ? # Linker script is in the config directory, not in Modules as the @@ -405,7 +399,7 @@ # it's taken care of for them by the 'build_ext.get_libraries()' # method.) g['LDSHARED'] = ("%s -L%s/lib -lpython%s" % - (linkerscript, PREFIX, sys.version[0:3])) + (linkerscript, PREFIX, get_python_version())) global _config_vars _config_vars = g From jhylton at users.sourceforge.net Sun Oct 16 07:24:34 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:34 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/distutils/command upload.py, NONE, 1.9.2.2 bdist_dumb.py, 1.20.2.2, 1.20.2.3 bdist_rpm.py, 1.29.2.2, 1.29.2.3 bdist_wininst.py, 1.33.2.2, 1.33.2.3 clean.py, 1.14.2.2, 1.14.2.3 install.py, 1.64.2.2, 1.64.2.3 register.py, 1.6.4.2, 1.6.4.3 sdist.py, 1.54.2.2, 1.54.2.3 wininst-6.exe, 1.7.2.1, 1.7.2.2 wininst-7.1.exe, 1.7.2.1, 1.7.2.2 Message-ID: <20051016052434.2C4B01E400D@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/distutils/command In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Lib/distutils/command Modified Files: Tag: ast-branch bdist_dumb.py bdist_rpm.py bdist_wininst.py clean.py install.py register.py sdist.py wininst-6.exe wininst-7.1.exe Added Files: Tag: ast-branch upload.py Log Message: Merge head to branch (for the last time) --- NEW FILE: upload.py --- """distutils.command.upload Implements the Distutils 'upload' subcommand (upload package to PyPI).""" from distutils.errors import * from distutils.core import Command from distutils.spawn import spawn from distutils import log from md5 import md5 import os import socket import platform import ConfigParser import httplib import base64 import urlparse import cStringIO as StringIO class upload(Command): description = "upload binary package to PyPI" DEFAULT_REPOSITORY = 'http://www.python.org/pypi' user_options = [ ('repository=', 'r', "url of repository [default: %s]" % DEFAULT_REPOSITORY), ('show-response', None, 'display full response text from server'), ('sign', 's', 'sign files to upload using gpg'), ] boolean_options = ['show-response', 'sign'] def initialize_options(self): self.username = '' self.password = '' self.repository = '' self.show_response = 0 self.sign = False def finalize_options(self): if os.environ.has_key('HOME'): rc = os.path.join(os.environ['HOME'], '.pypirc') if os.path.exists(rc): self.announce('Using PyPI login from %s' % rc) config = ConfigParser.ConfigParser({ 'username':'', 'password':'', 'repository':''}) config.read(rc) if not self.repository: self.repository = config.get('server-login', 'repository') if not self.username: self.username = config.get('server-login', 'username') if not self.password: self.password = config.get('server-login', 'password') if not self.repository: self.repository = self.DEFAULT_REPOSITORY def run(self): if not self.distribution.dist_files: raise DistutilsOptionError("No dist file created in earlier command") for command, pyversion, filename in self.distribution.dist_files: self.upload_file(command, pyversion, filename) def upload_file(self, command, pyversion, filename): # Sign if requested if self.sign: spawn(("gpg", "--detach-sign", "-a", filename), dry_run=self.dry_run) # Fill in the data content = open(filename,'rb').read() data = { ':action':'file_upload', 'protcol_version':'1', 'name':self.distribution.get_name(), 'version':self.distribution.get_version(), 'content':(os.path.basename(filename),content), 'filetype':command, 'pyversion':pyversion, 'md5_digest':md5(content).hexdigest(), } comment = '' if command == 'bdist_rpm': dist, version, id = platform.dist() if dist: comment = 'built for %s %s' % (dist, version) elif command == 'bdist_dumb': comment = 'built for %s' % platform.platform(terse=1) data['comment'] = comment if self.sign: data['gpg_signature'] = (os.path.basename(filename) + ".asc", open(filename+".asc").read()) # set up the authentication auth = "Basic " + base64.encodestring(self.username + ":" + self.password).strip() # Build up the MIME payload for the POST data boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' sep_boundary = '\n--' + boundary end_boundary = sep_boundary + '--' body = StringIO.StringIO() for key, value in data.items(): # handle multiple entries for the same name if type(value) != type([]): value = [value] for value in value: if type(value) is tuple: fn = ';filename="%s"' % value[0] value = value[1] else: fn = "" value = str(value) body.write(sep_boundary) body.write('\nContent-Disposition: form-data; name="%s"'%key) body.write(fn) body.write("\n\n") body.write(value) if value and value[-1] == '\r': body.write('\n') # write an extra newline (lurve Macs) body.write(end_boundary) body.write("\n") body = body.getvalue() self.announce("Submitting %s to %s" % (filename, self.repository), log.INFO) # build the Request # We can't use urllib2 since we need to send the Basic # auth right with the first request schema, netloc, url, params, query, fragments = \ urlparse.urlparse(self.repository) assert not params and not query and not fragments if schema == 'http': http = httplib.HTTPConnection(netloc) elif schema == 'https': http = httplib.HTTPSConnection(netloc) else: raise AssertionError, "unsupported schema "+schema data = '' loglevel = log.INFO try: http.connect() http.putrequest("POST", url) http.putheader('Content-type', 'multipart/form-data; boundary=%s'%boundary) http.putheader('Content-length', str(len(body))) http.putheader('Authorization', auth) http.endheaders() http.send(body) except socket.error, e: self.announce(e.msg, log.ERROR) return r = http.getresponse() if r.status == 200: self.announce('Server response (%s): %s' % (r.status, r.reason), log.INFO) else: self.announce('Upload failed (%s): %s' % (r.status, r.reason), log.ERROR) if self.show_response: print '-'*75, r.read(), '-'*75 Index: bdist_dumb.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/distutils/command/bdist_dumb.py,v retrieving revision 1.20.2.2 retrieving revision 1.20.2.3 diff -u -d -r1.20.2.2 -r1.20.2.3 --- bdist_dumb.py 7 Jan 2005 06:58:17 -0000 1.20.2.2 +++ bdist_dumb.py 16 Oct 2005 05:24:00 -0000 1.20.2.3 @@ -13,6 +13,7 @@ from distutils.util import get_platform from distutils.dir_util import create_tree, remove_tree, ensure_relative from distutils.errors import * +from distutils.sysconfig import get_python_version from distutils import log class bdist_dumb (Command): @@ -117,8 +118,14 @@ ensure_relative(install.install_base)) # Make the archive - self.make_archive(pseudoinstall_root, - self.format, root_dir=archive_root) + filename = self.make_archive(pseudoinstall_root, + self.format, root_dir=archive_root) + if self.distribution.has_ext_modules(): + pyversion = get_python_version() + else: + pyversion = 'any' + self.distribution.dist_files.append(('bdist_dumb', pyversion, + filename)) if not self.keep_temp: remove_tree(self.bdist_dir, dry_run=self.dry_run) Index: bdist_rpm.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/distutils/command/bdist_rpm.py,v retrieving revision 1.29.2.2 retrieving revision 1.29.2.3 diff -u -d -r1.29.2.2 -r1.29.2.3 --- bdist_rpm.py 7 Jan 2005 06:58:17 -0000 1.29.2.2 +++ bdist_rpm.py 16 Oct 2005 05:24:00 -0000 1.29.2.3 @@ -15,6 +15,7 @@ from distutils.util import get_platform from distutils.file_util import write_file from distutils.errors import * +from distutils.sysconfig import get_python_version from distutils import log class bdist_rpm (Command): @@ -297,12 +298,14 @@ # Make a source distribution and copy to SOURCES directory with # optional icon. + saved_dist_files = self.distribution.dist_files[:] sdist = self.reinitialize_command('sdist') if self.use_bzip2: sdist.formats = ['bztar'] else: sdist.formats = ['gztar'] self.run_command('sdist') + self.distribution.dist_files = saved_dist_files source = sdist.get_archive_files()[0] source_dir = rpm_dir['SOURCES'] @@ -344,21 +347,31 @@ srpms = glob.glob(os.path.join(rpm_dir['SRPMS'], "*.rpm")) assert len(srpms) == 1, \ "unexpected number of SRPM files found: %s" % srpms + dist_file = ('bdist_rpm', 'any', + self._dist_path(srpms[0])) + self.distribution.dist_files.append(dist_file) self.move_file(srpms[0], self.dist_dir) if not self.source_only: rpms = glob.glob(os.path.join(rpm_dir['RPMS'], "*/*.rpm")) - debuginfo = glob.glob(os.path.join(rpm_dir['RPMS'], \ + debuginfo = glob.glob(os.path.join(rpm_dir['RPMS'], "*/*debuginfo*.rpm")) if debuginfo: rpms.remove(debuginfo[0]) assert len(rpms) == 1, \ "unexpected number of RPM files found: %s" % rpms + dist_file = ('bdist_rpm', get_python_version(), + self._dist_path(rpms[0])) + self.distribution.dist_files.append(dist_file) self.move_file(rpms[0], self.dist_dir) if debuginfo: + dist_file = ('bdist_rpm', get_python_version(), + self._dist_path(debuginfo[0])) self.move_file(debuginfo[0], self.dist_dir) # run() + def _dist_path(self, path): + return os.path.join(self.dist_dir, os.path.basename(path)) def _make_spec_file(self): """Generate the text of an RPM spec file and return it as a Index: bdist_wininst.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/distutils/command/bdist_wininst.py,v retrieving revision 1.33.2.2 retrieving revision 1.33.2.3 diff -u -d -r1.33.2.2 -r1.33.2.3 --- bdist_wininst.py 7 Jan 2005 06:58:17 -0000 1.33.2.2 +++ bdist_wininst.py 16 Oct 2005 05:24:00 -0000 1.33.2.3 @@ -162,6 +162,12 @@ root_dir=self.bdist_dir) # create an exe containing the zip-file self.create_exe(arcname, fullname, self.bitmap) + if self.distribution.has_ext_modules(): + pyversion = get_python_version() + else: + pyversion = 'any' + self.distribution.dist_files.append(('bdist_wininst', pyversion, + self.get_installer_filename(fullname))) # remove the zip-file again log.debug("removing temporary file '%s'", arcname) os.remove(arcname) Index: clean.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/distutils/command/clean.py,v retrieving revision 1.14.2.2 retrieving revision 1.14.2.3 diff -u -d -r1.14.2.2 -r1.14.2.3 --- clean.py 7 Jan 2005 06:58:18 -0000 1.14.2.2 +++ clean.py 16 Oct 2005 05:24:00 -0000 1.14.2.3 @@ -15,7 +15,7 @@ class clean (Command): - description = "clean up output of 'build' command" + description = "clean up temporary files from 'build' command" user_options = [ ('build-base=', 'b', "base build directory (default: 'build.build-base')"), Index: install.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/distutils/command/install.py,v retrieving revision 1.64.2.2 retrieving revision 1.64.2.3 diff -u -d -r1.64.2.2 -r1.64.2.3 --- install.py 7 Jan 2005 06:58:18 -0000 1.64.2.2 +++ install.py 16 Oct 2005 05:24:00 -0000 1.64.2.3 @@ -352,8 +352,13 @@ opt_name = opt[0] if opt_name[-1] == "=": opt_name = opt_name[0:-1] - opt_name = string.translate(opt_name, longopt_xlate) - val = getattr(self, opt_name) + if self.negative_opt.has_key(opt_name): + opt_name = string.translate(self.negative_opt[opt_name], + longopt_xlate) + val = not getattr(self, opt_name) + else: + opt_name = string.translate(opt_name, longopt_xlate) + val = getattr(self, opt_name) print " %s: %s" % (opt_name, val) Index: register.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/distutils/command/register.py,v retrieving revision 1.6.4.2 retrieving revision 1.6.4.3 diff -u -d -r1.6.4.2 -r1.6.4.3 --- register.py 7 Jan 2005 06:58:18 -0000 1.6.4.2 +++ register.py 16 Oct 2005 05:24:00 -0000 1.6.4.3 @@ -231,7 +231,13 @@ 'platform': meta.get_platforms(), 'classifiers': meta.get_classifiers(), 'download_url': meta.get_download_url(), + # PEP 314 + 'provides': meta.get_provides(), + 'requires': meta.get_requires(), + 'obsoletes': meta.get_obsoletes(), } + if data['provides'] or data['requires'] or data['obsoletes']: + data['metadata_version'] = '1.1' return data def post_to_server(self, data, auth=None): @@ -248,7 +254,7 @@ if type(value) != type([]): value = [value] for value in value: - value = str(value) + value = unicode(value).encode("utf-8") body.write(sep_boundary) body.write('\nContent-Disposition: form-data; name="%s"'%key) body.write("\n\n") @@ -261,7 +267,7 @@ # build the Request headers = { - 'Content-type': 'multipart/form-data; boundary=%s'%boundary, + 'Content-type': 'multipart/form-data; boundary=%s; charset=utf-8'%boundary, 'Content-length': str(len(body)) } req = urllib2.Request(self.repository, body, headers) Index: sdist.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/distutils/command/sdist.py,v retrieving revision 1.54.2.2 retrieving revision 1.54.2.3 diff -u -d -r1.54.2.2 -r1.54.2.3 --- sdist.py 7 Jan 2005 06:58:18 -0000 1.54.2.2 +++ sdist.py 16 Oct 2005 05:24:00 -0000 1.54.2.3 @@ -449,6 +449,7 @@ for fmt in self.formats: file = self.make_archive(base_name, fmt, base_dir=base_dir) archive_files.append(file) + self.distribution.dist_files.append(('sdist', '', file)) self.archive_files = archive_files Index: wininst-6.exe =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/distutils/command/wininst-6.exe,v retrieving revision 1.7.2.1 retrieving revision 1.7.2.2 diff -u -d -r1.7.2.1 -r1.7.2.2 Binary files /tmp/cvsyel2jA and /tmp/cvsgGcGn6 differ Index: wininst-7.1.exe =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/distutils/command/wininst-7.1.exe,v retrieving revision 1.7.2.1 retrieving revision 1.7.2.2 diff -u -d -r1.7.2.1 -r1.7.2.2 Binary files /tmp/cvstfihKA and /tmp/cvsxCmPU6 differ From jhylton at users.sourceforge.net Sun Oct 16 07:24:34 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:34 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/xml/dom minidom.py, 1.48.2.2, 1.48.2.3 Message-ID: <20051016052434.5B4551E4012@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/xml/dom In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Lib/xml/dom Modified Files: Tag: ast-branch minidom.py Log Message: Merge head to branch (for the last time) Index: minidom.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/xml/dom/minidom.py,v retrieving revision 1.48.2.2 retrieving revision 1.48.2.3 diff -u -d -r1.48.2.2 -r1.48.2.3 --- minidom.py 7 Jan 2005 06:59:21 -0000 1.48.2.2 +++ minidom.py 16 Oct 2005 05:24:01 -0000 1.48.2.3 @@ -1278,15 +1278,15 @@ writer.write("\n") + writer.write(">"+newl) class Entity(Identified, Node): attributes = None @@ -1739,9 +1739,9 @@ def writexml(self, writer, indent="", addindent="", newl="", encoding = None): if encoding is None: - writer.write('\n') + writer.write(''+newl) else: - writer.write('\n' % encoding) + writer.write('%s' % (encoding, newl)) for node in self.childNodes: node.writexml(writer, indent, addindent, newl) From jhylton at users.sourceforge.net Sun Oct 16 07:24:34 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:34 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/xml/sax saxutils.py, 1.16.18.2, 1.16.18.3 Message-ID: <20051016052434.6F8D01E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/xml/sax In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Lib/xml/sax Modified Files: Tag: ast-branch saxutils.py Log Message: Merge head to branch (for the last time) Index: saxutils.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/xml/sax/saxutils.py,v retrieving revision 1.16.18.2 retrieving revision 1.16.18.3 diff -u -d -r1.16.18.2 -r1.16.18.3 --- saxutils.py 7 Jan 2005 06:59:22 -0000 1.16.18.2 +++ saxutils.py 16 Oct 2005 05:24:01 -0000 1.16.18.3 @@ -232,7 +232,7 @@ # EntityResolver methods def resolveEntity(self, publicId, systemId): - self._ent_handler.resolveEntity(publicId, systemId) + return self._ent_handler.resolveEntity(publicId, systemId) # XMLReader methods From jhylton at users.sourceforge.net Sun Oct 16 07:24:34 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:34 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test/output test_xdrlib, NONE, 1.1.2.2 test_cookie, 1.7, 1.7.20.1 test_mmap, 1.8.8.1, 1.8.8.2 test_ossaudiodev, 1.1.8.2, 1.1.8.3 test_asynchat, 1.1, NONE Message-ID: <20051016052434.A73611E400E@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test/output In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Lib/test/output Modified Files: Tag: ast-branch test_cookie test_mmap test_ossaudiodev Added Files: Tag: ast-branch test_xdrlib Removed Files: Tag: ast-branch test_asynchat Log Message: Merge head to branch (for the last time) --- NEW FILE: test_xdrlib --- test_xdrlib pack test 0 succeeded pack test 1 succeeded pack test 2 succeeded pack test 3 succeeded pack test 4 succeeded pack test 5 succeeded pack test 6 succeeded pack test 7 succeeded pack test 8 succeeded unpack test 0 succeeded : 9 unpack test 1 succeeded : True unpack test 2 succeeded : False unpack test 3 succeeded : 45 unpack test 4 succeeded : 1.89999997616 unpack test 5 succeeded : 1.9 unpack test 6 succeeded : hello world unpack test 7 succeeded : [0, 1, 2, 3, 4] unpack test 8 succeeded : ['what', 'is', 'hapnin', 'doctor'] Index: test_cookie =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/output/test_cookie,v retrieving revision 1.7 retrieving revision 1.7.20.1 diff -u -d -r1.7 -r1.7.20.1 --- test_cookie 13 May 2001 00:19:31 -0000 1.7 +++ test_cookie 16 Oct 2005 05:24:01 -0000 1.7.20.1 @@ -1,31 +1,31 @@ test_cookie -Set-Cookie: chips=ahoy; -Set-Cookie: vienna=finger; +Set-Cookie: chips=ahoy +Set-Cookie: vienna=finger chips 'ahoy' 'ahoy' -Set-Cookie: chips=ahoy; +Set-Cookie: chips=ahoy vienna 'finger' 'finger' -Set-Cookie: vienna=finger; +Set-Cookie: vienna=finger -Set-Cookie: keebler="E=mc2; L=\"Loves\"; fudge=\012;"; +Set-Cookie: keebler="E=mc2; L=\"Loves\"; fudge=\012;" keebler 'E=mc2; L="Loves"; fudge=\n;' 'E=mc2; L="Loves"; fudge=\n;' -Set-Cookie: keebler="E=mc2; L=\"Loves\"; fudge=\012;"; +Set-Cookie: keebler="E=mc2; L=\"Loves\"; fudge=\012;" -Set-Cookie: keebler=E=mc2; +Set-Cookie: keebler=E=mc2 keebler 'E=mc2' 'E=mc2' -Set-Cookie: keebler=E=mc2; -Set-Cookie: Customer="WILE_E_COYOTE"; Path=/acme; +Set-Cookie: keebler=E=mc2 +Set-Cookie: Customer="WILE_E_COYOTE"; Path=/acme - - Index: test_mmap =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/output/test_mmap,v retrieving revision 1.8.8.1 retrieving revision 1.8.8.2 diff -u -d -r1.8.8.1 -r1.8.8.2 --- test_mmap 28 Apr 2003 17:27:17 -0000 1.8.8.1 +++ test_mmap 16 Oct 2005 05:24:01 -0000 1.8.8.2 @@ -31,4 +31,6 @@ Modifying copy-on-write memory map. Ensuring copy-on-write maps cannot be resized. Ensuring invalid access parameter raises exception. + Ensuring that passing 0 as map length sets map size to current file size. + Ensuring that passing 0 as map length sets map size to current file size. Test passed Index: test_ossaudiodev =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/output/test_ossaudiodev,v retrieving revision 1.1.8.2 retrieving revision 1.1.8.3 diff -u -d -r1.1.8.2 -r1.1.8.3 --- test_ossaudiodev 7 Jan 2005 06:59:20 -0000 1.1.8.2 +++ test_ossaudiodev 16 Oct 2005 05:24:01 -0000 1.1.8.3 @@ -1,6 +1,3 @@ test_ossaudiodev playing test sound file... -elapsed time: 2.9 sec -setparameters: got OSSAudioError as expected -setparameters: got OSSAudioError as expected -setparameters: got OSSAudioError as expected +elapsed time: 3.1 sec --- test_asynchat DELETED --- From jhylton at users.sourceforge.net Sun Oct 16 07:24:34 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:34 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/app _Appmodule.c, 1.11.2.2, 1.11.2.3 Message-ID: <20051016052434.9681B1E4008@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/app In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/app Modified Files: Tag: ast-branch _Appmodule.c Log Message: Merge head to branch (for the last time) Index: _Appmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/app/_Appmodule.c,v retrieving revision 1.11.2.2 retrieving revision 1.11.2.3 diff -u -d -r1.11.2.2 -r1.11.2.3 --- _Appmodule.c 7 Jan 2005 07:01:03 -0000 1.11.2.2 +++ _Appmodule.c 16 Oct 2005 05:24:01 -0000 1.11.2.3 @@ -20,7 +20,7 @@ int ThemeButtonDrawInfo_Convert(PyObject *v, ThemeButtonDrawInfo *p_itself) { - return PyArg_Parse(v, "(iHH)", &p_itself->state, &p_itself->value, &p_itself->adornment); + return PyArg_Parse(v, "(iHH)", &p_itself->state, &p_itself->value, &p_itself->adornment); } @@ -45,6 +45,7 @@ it->ob_itself = itself; return (PyObject *)it; } + int ThemeDrawingStateObj_Convert(PyObject *v, ThemeDrawingState *p_itself) { if (!ThemeDrawingStateObj_Check(v)) @@ -115,16 +116,16 @@ #define ThemeDrawingStateObj_tp_alloc PyType_GenericAlloc -static PyObject *ThemeDrawingStateObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *ThemeDrawingStateObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; ThemeDrawingState itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, ThemeDrawingStateObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((ThemeDrawingStateObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, ThemeDrawingStateObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((ThemeDrawingStateObject *)_self)->ob_itself = itself; + return _self; } #define ThemeDrawingStateObj_tp_free PyObject_Del From jhylton at users.sourceforge.net Sun Oct 16 07:24:34 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:34 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/ae _AEmodule.c, 1.11.2.2, 1.11.2.3 Message-ID: <20051016052434.AB2BD1E4011@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/ae In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/ae Modified Files: Tag: ast-branch _AEmodule.c Log Message: Merge head to branch (for the last time) Index: _AEmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/ae/_AEmodule.c,v retrieving revision 1.11.2.2 retrieving revision 1.11.2.3 diff -u -d -r1.11.2.2 -r1.11.2.3 --- _AEmodule.c 7 Jan 2005 07:01:03 -0000 1.11.2.2 +++ _AEmodule.c 16 Oct 2005 05:24:01 -0000 1.11.2.3 @@ -34,9 +34,9 @@ static pascal Boolean AEIdleProc(EventRecord *theEvent, long *sleepTime, RgnHandle *mouseRgn) { - if ( PyOS_InterruptOccurred() ) - return 1; - return 0; + if ( PyOS_InterruptOccurred() ) + return 1; + return 0; } AEIdleUPP upp_AEIdleProc; @@ -64,6 +64,7 @@ it->ob_owned = 1; return (PyObject *)it; } + int AEDesc_Convert(PyObject *v, AEDesc *p_itself) { if (!AEDesc_Check(v)) @@ -829,20 +830,20 @@ static PyObject *AEDesc_get_data(AEDescObject *self, void *closure) { - PyObject *res; - Size size; - char *ptr; - OSErr err; - - size = AEGetDescDataSize(&self->ob_itself); - if ( (res = PyString_FromStringAndSize(NULL, size)) == NULL ) - return NULL; - if ( (ptr = PyString_AsString(res)) == NULL ) - return NULL; - if ( (err=AEGetDescData(&self->ob_itself, ptr, size)) < 0 ) - return PyMac_Error(err); - return res; - + PyObject *res; + Size size; + char *ptr; + OSErr err; + + size = AEGetDescDataSize(&self->ob_itself); + if ( (res = PyString_FromStringAndSize(NULL, size)) == NULL ) + return NULL; + if ( (ptr = PyString_AsString(res)) == NULL ) + return NULL; + if ( (err=AEGetDescData(&self->ob_itself, ptr, size)) < 0 ) + return PyMac_Error(err); + return res; + } #define AEDesc_set_data NULL @@ -863,16 +864,16 @@ #define AEDesc_tp_alloc PyType_GenericAlloc -static PyObject *AEDesc_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *AEDesc_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; AEDesc itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, AEDesc_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((AEDescObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, AEDesc_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((AEDescObject *)_self)->ob_itself = itself; + return _self; } #define AEDesc_tp_free PyObject_Del @@ -1383,44 +1384,44 @@ static pascal OSErr GenericEventHandler(const AppleEvent *request, AppleEvent *reply, refcontype refcon) { - PyObject *handler = (PyObject *)refcon; - AEDescObject *requestObject, *replyObject; - PyObject *args, *res; - if ((requestObject = (AEDescObject *)AEDesc_New((AppleEvent *)request)) == NULL) { - return -1; - } - if ((replyObject = (AEDescObject *)AEDesc_New(reply)) == NULL) { - Py_DECREF(requestObject); - return -1; - } - if ((args = Py_BuildValue("OO", requestObject, replyObject)) == NULL) { - Py_DECREF(requestObject); - Py_DECREF(replyObject); - return -1; - } - res = PyEval_CallObject(handler, args); - requestObject->ob_itself.descriptorType = 'null'; - requestObject->ob_itself.dataHandle = NULL; - replyObject->ob_itself.descriptorType = 'null'; - replyObject->ob_itself.dataHandle = NULL; - Py_DECREF(args); - if (res == NULL) { - PySys_WriteStderr("Exception in AE event handler function\n"); - PyErr_Print(); - return -1; - } - Py_DECREF(res); - return noErr; + PyObject *handler = (PyObject *)refcon; + AEDescObject *requestObject, *replyObject; + PyObject *args, *res; + if ((requestObject = (AEDescObject *)AEDesc_New((AppleEvent *)request)) == NULL) { + return -1; + } + if ((replyObject = (AEDescObject *)AEDesc_New(reply)) == NULL) { + Py_DECREF(requestObject); + return -1; + } + if ((args = Py_BuildValue("OO", requestObject, replyObject)) == NULL) { + Py_DECREF(requestObject); + Py_DECREF(replyObject); + return -1; + } + res = PyEval_CallObject(handler, args); + requestObject->ob_itself.descriptorType = 'null'; + requestObject->ob_itself.dataHandle = NULL; + replyObject->ob_itself.descriptorType = 'null'; + replyObject->ob_itself.dataHandle = NULL; + Py_DECREF(args); + if (res == NULL) { + PySys_WriteStderr("Exception in AE event handler function\n"); + PyErr_Print(); + return -1; + } + Py_DECREF(res); + return noErr; } PyObject *AEDesc_NewBorrowed(AEDesc *itself) { - PyObject *it; - - it = AEDesc_New(itself); - if (it) - ((AEDescObject *)it)->ob_owned = 0; - return (PyObject *)it; + PyObject *it; + + it = AEDesc_New(itself); + if (it) + ((AEDescObject *)it)->ob_owned = 0; + return (PyObject *)it; } @@ -1432,11 +1433,11 @@ - upp_AEIdleProc = NewAEIdleUPP(AEIdleProc); - upp_GenericEventHandler = NewAEEventHandlerUPP(GenericEventHandler); - PyMac_INIT_TOOLBOX_OBJECT_NEW(AEDesc *, AEDesc_New); - PyMac_INIT_TOOLBOX_OBJECT_NEW(AEDesc *, AEDesc_NewBorrowed); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(AEDesc, AEDesc_Convert); + upp_AEIdleProc = NewAEIdleUPP(AEIdleProc); + upp_GenericEventHandler = NewAEEventHandlerUPP(GenericEventHandler); + PyMac_INIT_TOOLBOX_OBJECT_NEW(AEDesc *, AEDesc_New); + PyMac_INIT_TOOLBOX_OBJECT_NEW(AEDesc *, AEDesc_NewBorrowed); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(AEDesc, AEDesc_Convert); m = Py_InitModule("_AE", AE_methods); From jhylton at users.sourceforge.net Sun Oct 16 07:24:35 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/cg _CGmodule.c, 1.6.2.2, 1.6.2.3 Message-ID: <20051016052435.0DB7E1E400A@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/cg In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/cg Modified Files: Tag: ast-branch _CGmodule.c Log Message: Merge head to branch (for the last time) Index: _CGmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/cg/_CGmodule.c,v retrieving revision 1.6.2.2 retrieving revision 1.6.2.3 diff -u -d -r1.6.2.2 -r1.6.2.3 --- _CGmodule.c 7 Jan 2005 07:01:20 -0000 1.6.2.2 +++ _CGmodule.c 16 Oct 2005 05:24:01 -0000 1.6.2.3 @@ -26,67 +26,67 @@ PyObject *CGPoint_New(CGPoint *itself) { - return Py_BuildValue("(ff)", - itself->x, - itself->y); + return Py_BuildValue("(ff)", + itself->x, + itself->y); } int CGPoint_Convert(PyObject *v, CGPoint *p_itself) { - if( !PyArg_Parse(v, "(ff)", - &p_itself->x, - &p_itself->y) ) - return 0; - return 1; + if( !PyArg_Parse(v, "(ff)", + &p_itself->x, + &p_itself->y) ) + return 0; + return 1; } PyObject *CGRect_New(CGRect *itself) { - return Py_BuildValue("(ffff)", - itself->origin.x, - itself->origin.y, - itself->size.width, - itself->size.height); + return Py_BuildValue("(ffff)", + itself->origin.x, + itself->origin.y, + itself->size.width, + itself->size.height); } int CGRect_Convert(PyObject *v, CGRect *p_itself) { - if( !PyArg_Parse(v, "(ffff)", - &p_itself->origin.x, - &p_itself->origin.y, - &p_itself->size.width, - &p_itself->size.height) ) - return 0; - return 1; + if( !PyArg_Parse(v, "(ffff)", + &p_itself->origin.x, + &p_itself->origin.y, + &p_itself->size.width, + &p_itself->size.height) ) + return 0; + return 1; } PyObject *CGAffineTransform_New(CGAffineTransform *itself) { - return Py_BuildValue("(ffffff)", - itself->a, - itself->b, - itself->c, - itself->d, - itself->tx, - itself->ty); + return Py_BuildValue("(ffffff)", + itself->a, + itself->b, + itself->c, + itself->d, + itself->tx, + itself->ty); } int CGAffineTransform_Convert(PyObject *v, CGAffineTransform *p_itself) { - if( !PyArg_Parse(v, "(ffffff)", - &p_itself->a, - &p_itself->b, - &p_itself->c, - &p_itself->d, - &p_itself->tx, - &p_itself->ty) ) - return 0; - return 1; + if( !PyArg_Parse(v, "(ffffff)", + &p_itself->a, + &p_itself->b, + &p_itself->c, + &p_itself->d, + &p_itself->tx, + &p_itself->ty) ) + return 0; + return 1; } static PyObject *CG_Error; @@ -110,6 +110,7 @@ it->ob_itself = itself; return (PyObject *)it; } + int CGContextRefObj_Convert(PyObject *v, CGContextRef *p_itself) { if (!CGContextRefObj_Check(v)) @@ -1191,16 +1192,16 @@ #define CGContextRefObj_tp_alloc PyType_GenericAlloc -static PyObject *CGContextRefObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *CGContextRefObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; CGContextRef itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CGContextRefObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((CGContextRefObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CGContextRefObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((CGContextRefObject *)_self)->ob_itself = itself; + return _self; } #define CGContextRefObj_tp_free PyObject_Del @@ -1261,11 +1262,11 @@ OSStatus _err; if (!PyArg_ParseTuple(_args, "O&", GrafObj_Convert, &port)) - return NULL; + return NULL; _err = CreateCGContextForPort(port, &ctx); if (_err != noErr) - if (_err != noErr) return PyMac_Error(_err); + if (_err != noErr) return PyMac_Error(_err); _res = Py_BuildValue("O&", CGContextRefObj_New, ctx); return _res; From jhylton at users.sourceforge.net Sun Oct 16 07:24:35 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/cm _Cmmodule.c, 1.9.2.2, 1.9.2.3 Message-ID: <20051016052435.259401E401A@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/cm In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/cm Modified Files: Tag: ast-branch _Cmmodule.c Log Message: Merge head to branch (for the last time) Index: _Cmmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/cm/_Cmmodule.c,v retrieving revision 1.9.2.2 retrieving revision 1.9.2.3 diff -u -d -r1.9.2.2 -r1.9.2.3 --- _Cmmodule.c 7 Jan 2005 07:01:20 -0000 1.9.2.2 +++ _Cmmodule.c 16 Oct 2005 05:24:01 -0000 1.9.2.3 @@ -36,21 +36,21 @@ CmpDesc_New(ComponentDescription *itself) { - return Py_BuildValue("O&O&O&ll", - PyMac_BuildOSType, itself->componentType, - PyMac_BuildOSType, itself->componentSubType, - PyMac_BuildOSType, itself->componentManufacturer, - itself->componentFlags, itself->componentFlagsMask); + return Py_BuildValue("O&O&O&ll", + PyMac_BuildOSType, itself->componentType, + PyMac_BuildOSType, itself->componentSubType, + PyMac_BuildOSType, itself->componentManufacturer, + itself->componentFlags, itself->componentFlagsMask); } static int CmpDesc_Convert(PyObject *v, ComponentDescription *p_itself) { - return PyArg_ParseTuple(v, "O&O&O&ll", - PyMac_GetOSType, &p_itself->componentType, - PyMac_GetOSType, &p_itself->componentSubType, - PyMac_GetOSType, &p_itself->componentManufacturer, - &p_itself->componentFlags, &p_itself->componentFlagsMask); + return PyArg_ParseTuple(v, "O&O&O&ll", + PyMac_GetOSType, &p_itself->componentType, + PyMac_GetOSType, &p_itself->componentSubType, + PyMac_GetOSType, &p_itself->componentManufacturer, + &p_itself->componentFlags, &p_itself->componentFlagsMask); } @@ -71,14 +71,15 @@ { ComponentInstanceObject *it; if (itself == NULL) { - PyErr_SetString(Cm_Error,"NULL ComponentInstance"); - return NULL; - } + PyErr_SetString(Cm_Error,"NULL ComponentInstance"); + return NULL; + } it = PyObject_NEW(ComponentInstanceObject, &ComponentInstance_Type); if (it == NULL) return NULL; it->ob_itself = itself; return (PyObject *)it; } + int CmpInstObj_Convert(PyObject *v, ComponentInstance *p_itself) { if (!CmpInstObj_Check(v)) @@ -260,16 +261,16 @@ #define CmpInstObj_tp_alloc PyType_GenericAlloc -static PyObject *CmpInstObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *CmpInstObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; ComponentInstance itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CmpInstObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((ComponentInstanceObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CmpInstObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((ComponentInstanceObject *)_self)->ob_itself = itself; + return _self; } #define CmpInstObj_tp_free PyObject_Del @@ -337,21 +338,22 @@ { ComponentObject *it; if (itself == NULL) { - /* XXXX Or should we return None? */ - PyErr_SetString(Cm_Error,"No such component"); - return NULL; - } + /* XXXX Or should we return None? */ + PyErr_SetString(Cm_Error,"No such component"); + return NULL; + } it = PyObject_NEW(ComponentObject, &Component_Type); if (it == NULL) return NULL; it->ob_itself = itself; return (PyObject *)it; } + int CmpObj_Convert(PyObject *v, Component *p_itself) { if ( v == Py_None ) { - *p_itself = 0; - return 1; - } + *p_itself = 0; + return 1; + } if (!CmpObj_Check(v)) { PyErr_SetString(PyExc_TypeError, "Component required"); @@ -693,16 +695,16 @@ #define CmpObj_tp_alloc PyType_GenericAlloc -static PyObject *CmpObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *CmpObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; Component itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CmpObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((ComponentObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CmpObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((ComponentObject *)_self)->ob_itself = itself; + return _self; } #define CmpObj_tp_free PyObject_Del @@ -913,10 +915,10 @@ - PyMac_INIT_TOOLBOX_OBJECT_NEW(Component, CmpObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(Component, CmpObj_Convert); - PyMac_INIT_TOOLBOX_OBJECT_NEW(ComponentInstance, CmpInstObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(ComponentInstance, CmpInstObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(Component, CmpObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(Component, CmpObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(ComponentInstance, CmpInstObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(ComponentInstance, CmpInstObj_Convert); m = Py_InitModule("_Cm", Cm_methods); From jhylton at users.sourceforge.net Sun Oct 16 07:24:35 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/carbonevt _CarbonEvtmodule.c, 1.9.2.2, 1.9.2.3 Message-ID: <20051016052435.1A8B81E4019@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/carbonevt In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/carbonevt Modified Files: Tag: ast-branch _CarbonEvtmodule.c Log Message: Merge head to branch (for the last time) Index: _CarbonEvtmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/carbonevt/_CarbonEvtmodule.c,v retrieving revision 1.9.2.2 retrieving revision 1.9.2.3 diff -u -d -r1.9.2.2 -r1.9.2.3 --- _CarbonEvtmodule.c 7 Jan 2005 07:01:04 -0000 1.9.2.2 +++ _CarbonEvtmodule.c 16 Oct 2005 05:24:01 -0000 1.9.2.3 @@ -30,17 +30,17 @@ static PyObject* EventTypeSpec_New(EventTypeSpec *in) { - return Py_BuildValue("ll", in->eventClass, in->eventKind); + return Py_BuildValue("ll", in->eventClass, in->eventKind); } static int EventTypeSpec_Convert(PyObject *v, EventTypeSpec *out) { - if (PyArg_Parse(v, "(O&l)", - PyMac_GetOSType, &(out->eventClass), - &(out->eventKind))) - return 1; - return NULL; + if (PyArg_Parse(v, "(O&l)", + PyMac_GetOSType, &(out->eventClass), + &(out->eventKind))) + return 1; + return NULL; } /********** end EventTypeSpec *******/ @@ -51,15 +51,15 @@ static PyObject* HIPoint_New(HIPoint *in) { - return Py_BuildValue("ff", in->x, in->y); + return Py_BuildValue("ff", in->x, in->y); } static int HIPoint_Convert(PyObject *v, HIPoint *out) { - if (PyArg_ParseTuple(v, "ff", &(out->x), &(out->y))) - return 1; - return NULL; + if (PyArg_ParseTuple(v, "ff", &(out->x), &(out->y))) + return 1; + return NULL; } #endif @@ -70,15 +70,15 @@ static PyObject* EventHotKeyID_New(EventHotKeyID *in) { - return Py_BuildValue("ll", in->signature, in->id); + return Py_BuildValue("ll", in->signature, in->id); } static int EventHotKeyID_Convert(PyObject *v, EventHotKeyID *out) { - if (PyArg_ParseTuple(v, "ll", &out->signature, &out->id)) - return 1; - return NULL; + if (PyArg_ParseTuple(v, "ll", &out->signature, &out->id)) + return 1; + return NULL; } /********** end EventHotKeyID *******/ @@ -89,27 +89,27 @@ static pascal OSStatus myEventHandler(EventHandlerCallRef handlerRef, EventRef event, void *outPyObject) { - PyObject *retValue; - int status; + PyObject *retValue; + int status; - retValue = PyObject_CallFunction((PyObject *)outPyObject, "O&O&", - EventHandlerCallRef_New, handlerRef, - EventRef_New, event); - if (retValue == NULL) { - PySys_WriteStderr("Error in event handler callback:\n"); - PyErr_Print(); /* this also clears the error */ - status = noErr; /* complain? how? */ - } else { - if (retValue == Py_None) - status = noErr; - else if (PyInt_Check(retValue)) { - status = PyInt_AsLong(retValue); - } else - status = noErr; /* wrong object type, complain? */ - Py_DECREF(retValue); - } + retValue = PyObject_CallFunction((PyObject *)outPyObject, "O&O&", + EventHandlerCallRef_New, handlerRef, + EventRef_New, event); + if (retValue == NULL) { + PySys_WriteStderr("Error in event handler callback:\n"); + PyErr_Print(); /* this also clears the error */ + status = noErr; /* complain? how? */ + } else { + if (retValue == Py_None) + status = noErr; + else if (PyInt_Check(retValue)) { + status = PyInt_AsLong(retValue); + } else + status = noErr; /* wrong object type, complain? */ + Py_DECREF(retValue); + } - return status; + return status; } /******** end myEventHandler ***********/ @@ -136,6 +136,7 @@ it->ob_itself = itself; return (PyObject *)it; } + int EventRef_Convert(PyObject *v, EventRef *p_itself) { if (!EventRef_Check(v)) @@ -399,16 +400,16 @@ #define EventRef_tp_alloc PyType_GenericAlloc -static PyObject *EventRef_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *EventRef_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; EventRef itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, EventRef_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((EventRefObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, EventRef_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((EventRefObject *)_self)->ob_itself = itself; + return _self; } #define EventRef_tp_free PyObject_Del @@ -480,6 +481,7 @@ it->ob_itself = itself; return (PyObject *)it; } + int EventQueueRef_Convert(PyObject *v, EventQueueRef *p_itself) { if (!EventQueueRef_Check(v)) @@ -619,16 +621,16 @@ #define EventQueueRef_tp_alloc PyType_GenericAlloc -static PyObject *EventQueueRef_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *EventQueueRef_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; EventQueueRef itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, EventQueueRef_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((EventQueueRefObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, EventQueueRef_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((EventQueueRefObject *)_self)->ob_itself = itself; + return _self; } #define EventQueueRef_tp_free PyObject_Del @@ -700,6 +702,7 @@ it->ob_itself = itself; return (PyObject *)it; } + int EventLoopRef_Convert(PyObject *v, EventLoopRef *p_itself) { if (!EventLoopRef_Check(v)) @@ -748,16 +751,16 @@ #define EventLoopRef_tp_alloc PyType_GenericAlloc -static PyObject *EventLoopRef_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *EventLoopRef_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; EventLoopRef itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, EventLoopRef_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((EventLoopRefObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, EventLoopRef_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((EventLoopRefObject *)_self)->ob_itself = itself; + return _self; } #define EventLoopRef_tp_free PyObject_Del @@ -829,6 +832,7 @@ it->ob_itself = itself; return (PyObject *)it; } + int EventLoopTimerRef_Convert(PyObject *v, EventLoopTimerRef *p_itself) { if (!EventLoopTimerRef_Check(v)) @@ -895,16 +899,16 @@ #define EventLoopTimerRef_tp_alloc PyType_GenericAlloc -static PyObject *EventLoopTimerRef_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *EventLoopTimerRef_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; EventLoopTimerRef itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, EventLoopTimerRef_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((EventLoopTimerRefObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, EventLoopTimerRef_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((EventLoopTimerRefObject *)_self)->ob_itself = itself; + return _self; } #define EventLoopTimerRef_tp_free PyObject_Del @@ -978,6 +982,7 @@ it->ob_callback = NULL; return (PyObject *)it; } + int EventHandlerRef_Convert(PyObject *v, EventHandlerRef *p_itself) { if (!EventHandlerRef_Check(v)) @@ -1050,11 +1055,11 @@ OSStatus _err; if (_self->ob_itself == NULL) { - PyErr_SetString(CarbonEvents_Error, "Handler has been removed"); - return NULL; + PyErr_SetString(CarbonEvents_Error, "Handler has been removed"); + return NULL; } if (!PyArg_ParseTuple(_args, "")) - return NULL; + return NULL; _err = RemoveEventHandler(_self->ob_itself); if (_err != noErr) return PyMac_Error(_err); _self->ob_itself = NULL; @@ -1087,16 +1092,16 @@ #define EventHandlerRef_tp_alloc PyType_GenericAlloc -static PyObject *EventHandlerRef_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *EventHandlerRef_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; EventHandlerRef itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, EventHandlerRef_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((EventHandlerRefObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, EventHandlerRef_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((EventHandlerRefObject *)_self)->ob_itself = itself; + return _self; } #define EventHandlerRef_tp_free PyObject_Del @@ -1168,6 +1173,7 @@ it->ob_itself = itself; return (PyObject *)it; } + int EventHandlerCallRef_Convert(PyObject *v, EventHandlerCallRef *p_itself) { if (!EventHandlerCallRef_Check(v)) @@ -1219,16 +1225,16 @@ #define EventHandlerCallRef_tp_alloc PyType_GenericAlloc -static PyObject *EventHandlerCallRef_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *EventHandlerCallRef_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; EventHandlerCallRef itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, EventHandlerCallRef_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((EventHandlerCallRefObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, EventHandlerCallRef_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((EventHandlerCallRefObject *)_self)->ob_itself = itself; + return _self; } #define EventHandlerCallRef_tp_free PyObject_Del @@ -1300,6 +1306,7 @@ it->ob_itself = itself; return (PyObject *)it; } + int EventTargetRef_Convert(PyObject *v, EventTargetRef *p_itself) { if (!EventTargetRef_Check(v)) @@ -1340,15 +1347,15 @@ OSStatus _err; if (!PyArg_ParseTuple(_args, "O&O", EventTypeSpec_Convert, &inSpec, &callback)) - return NULL; + return NULL; _err = InstallEventHandler(_self->ob_itself, myEventHandlerUPP, 1, &inSpec, (void *)callback, &outRef); if (_err != noErr) return PyMac_Error(_err); _res = EventHandlerRef_New(outRef); if (_res != NULL) { - ((EventHandlerRefObject*)_res)->ob_callback = callback; - Py_INCREF(callback); + ((EventHandlerRefObject*)_res)->ob_callback = callback; + Py_INCREF(callback); } return _res; } @@ -1373,16 +1380,16 @@ #define EventTargetRef_tp_alloc PyType_GenericAlloc -static PyObject *EventTargetRef_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *EventTargetRef_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; EventTargetRef itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, EventTargetRef_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((EventTargetRefObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, EventTargetRef_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((EventTargetRefObject *)_self)->ob_itself = itself; + return _self; } #define EventTargetRef_tp_free PyObject_Del @@ -1454,6 +1461,7 @@ it->ob_itself = itself; return (PyObject *)it; } + int EventHotKeyRef_Convert(PyObject *v, EventHotKeyRef *p_itself) { if (!EventHotKeyRef_Check(v)) @@ -1502,16 +1510,16 @@ #define EventHotKeyRef_tp_alloc PyType_GenericAlloc -static PyObject *EventHotKeyRef_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *EventHotKeyRef_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; EventHotKeyRef itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, EventHotKeyRef_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((EventHotKeyRefObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, EventHotKeyRef_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((EventHotKeyRefObject *)_self)->ob_itself = itself; + return _self; } #define EventHotKeyRef_tp_free PyObject_Del From jhylton at users.sourceforge.net Sun Oct 16 07:24:35 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/evt _Evtmodule.c, 1.6.2.2, 1.6.2.3 Message-ID: <20051016052435.5B2841E4010@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/evt In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/evt Modified Files: Tag: ast-branch _Evtmodule.c Log Message: Merge head to branch (for the last time) Index: _Evtmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/evt/_Evtmodule.c,v retrieving revision 1.6.2.2 retrieving revision 1.6.2.3 diff -u -d -r1.6.2.2 -r1.6.2.3 --- _Evtmodule.c 7 Jan 2005 07:01:23 -0000 1.6.2.2 +++ _Evtmodule.c 16 Oct 2005 05:24:02 -0000 1.6.2.3 @@ -455,7 +455,7 @@ &eventMask, &sleep, OptResObj_Convert, &mouseregion)) - return NULL; + return NULL; _rv = WaitNextEvent(eventMask, &theEvent, sleep, From jhylton at users.sourceforge.net Sun Oct 16 07:24:35 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/dlg _Dlgmodule.c, 1.10.2.2, 1.10.2.3 Message-ID: <20051016052435.5D3F51E401C@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/dlg In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/dlg Modified Files: Tag: ast-branch _Dlgmodule.c Log Message: Merge head to branch (for the last time) Index: _Dlgmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/dlg/_Dlgmodule.c,v retrieving revision 1.10.2.2 retrieving revision 1.10.2.3 diff -u -d -r1.10.2.2 -r1.10.2.3 --- _Dlgmodule.c 7 Jan 2005 07:01:22 -0000 1.10.2.2 +++ _Dlgmodule.c 16 Oct 2005 05:24:02 -0000 1.10.2.3 @@ -34,55 +34,55 @@ EventRecord *event, short *itemHit) { - Boolean rv; - PyObject *args, *res; - PyObject *callback = Dlg_FilterProc_callback; - if (callback == NULL) - return 0; /* Default behavior */ - Dlg_FilterProc_callback = NULL; /* We'll restore it when call successful */ - args = Py_BuildValue("O&O&", DlgObj_WhichDialog, dialog, PyMac_BuildEventRecord, event); - if (args == NULL) - res = NULL; - else { - res = PyEval_CallObject(callback, args); - Py_DECREF(args); - } - if (res == NULL) { - PySys_WriteStderr("Exception in Dialog Filter\n"); - PyErr_Print(); - *itemHit = -1; /* Fake return item */ - return 1; /* We handled it */ - } - else { - Dlg_FilterProc_callback = callback; - if (PyInt_Check(res)) { - *itemHit = PyInt_AsLong(res); - rv = 1; - } - else - rv = PyObject_IsTrue(res); - } - Py_DECREF(res); - return rv; + Boolean rv; + PyObject *args, *res; + PyObject *callback = Dlg_FilterProc_callback; + if (callback == NULL) + return 0; /* Default behavior */ + Dlg_FilterProc_callback = NULL; /* We'll restore it when call successful */ + args = Py_BuildValue("O&O&", DlgObj_WhichDialog, dialog, PyMac_BuildEventRecord, event); + if (args == NULL) + res = NULL; + else { + res = PyEval_CallObject(callback, args); + Py_DECREF(args); + } + if (res == NULL) { + PySys_WriteStderr("Exception in Dialog Filter\n"); + PyErr_Print(); + *itemHit = -1; /* Fake return item */ + return 1; /* We handled it */ + } + else { + Dlg_FilterProc_callback = callback; + if (PyInt_Check(res)) { + *itemHit = PyInt_AsLong(res); + rv = 1; + } + else + rv = PyObject_IsTrue(res); + } + Py_DECREF(res); + return rv; } static ModalFilterUPP Dlg_PassFilterProc(PyObject *callback) { - PyObject *tmp = Dlg_FilterProc_callback; - static ModalFilterUPP UnivFilterUpp = NULL; - - Dlg_FilterProc_callback = NULL; - if (callback == Py_None) { - Py_XDECREF(tmp); - return NULL; - } - Py_INCREF(callback); - Dlg_FilterProc_callback = callback; - Py_XDECREF(tmp); - if ( UnivFilterUpp == NULL ) - UnivFilterUpp = NewModalFilterUPP(&Dlg_UnivFilterProc); - return UnivFilterUpp; + PyObject *tmp = Dlg_FilterProc_callback; + static ModalFilterUPP UnivFilterUpp = NULL; + + Dlg_FilterProc_callback = NULL; + if (callback == Py_None) { + Py_XDECREF(tmp); + return NULL; + } + Py_INCREF(callback); + Dlg_FilterProc_callback = callback; + Py_XDECREF(tmp); + if ( UnivFilterUpp == NULL ) + UnivFilterUpp = NewModalFilterUPP(&Dlg_UnivFilterProc); + return UnivFilterUpp; } static PyObject *Dlg_UserItemProc_callback = NULL; @@ -90,24 +90,24 @@ static pascal void Dlg_UnivUserItemProc(DialogPtr dialog, short item) { - PyObject *args, *res; + PyObject *args, *res; - if (Dlg_UserItemProc_callback == NULL) - return; /* Default behavior */ - Dlg_FilterProc_callback = NULL; /* We'll restore it when call successful */ - args = Py_BuildValue("O&h", DlgObj_WhichDialog, dialog, item); - if (args == NULL) - res = NULL; - else { - res = PyEval_CallObject(Dlg_UserItemProc_callback, args); - Py_DECREF(args); - } - if (res == NULL) { - PySys_WriteStderr("Exception in Dialog UserItem proc\n"); - PyErr_Print(); - } - Py_XDECREF(res); - return; + if (Dlg_UserItemProc_callback == NULL) + return; /* Default behavior */ + Dlg_FilterProc_callback = NULL; /* We'll restore it when call successful */ + args = Py_BuildValue("O&h", DlgObj_WhichDialog, dialog, item); + if (args == NULL) + res = NULL; + else { + res = PyEval_CallObject(Dlg_UserItemProc_callback, args); + Py_DECREF(args); + } + if (res == NULL) { + PySys_WriteStderr("Exception in Dialog UserItem proc\n"); + PyErr_Print(); + } + Py_XDECREF(res); + return; } #if 0 @@ -146,6 +146,7 @@ SetWRefCon(GetDialogWindow(itself), (long)it); return (PyObject *)it; } + int DlgObj_Convert(PyObject *v, DialogPtr *p_itself) { if (v == Py_None) { *p_itself = NULL; return 1; } @@ -958,16 +959,16 @@ #define DlgObj_tp_alloc PyType_GenericAlloc -static PyObject *DlgObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *DlgObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; DialogPtr itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, DlgObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((DialogObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, DlgObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((DialogObject *)_self)->ob_itself = itself; + return _self; } #define DlgObj_tp_free PyObject_Del @@ -1452,28 +1453,28 @@ { PyObject *_res = NULL; - PyObject *new = NULL; - - - if (!PyArg_ParseTuple(_args, "|O", &new)) - return NULL; + PyObject *new = NULL; - if (Dlg_UserItemProc_callback && new && new != Py_None) { - PyErr_SetString(Dlg_Error, "Another UserItemProc is already installed"); - return NULL; - } - - if (new == NULL || new == Py_None) { - new = NULL; - _res = Py_None; - Py_INCREF(Py_None); - } else { - Py_INCREF(new); - _res = Py_BuildValue("O&", ResObj_New, (Handle)NewUserItemUPP(Dlg_UnivUserItemProc)); - } - - Dlg_UserItemProc_callback = new; - return _res; + + if (!PyArg_ParseTuple(_args, "|O", &new)) + return NULL; + + if (Dlg_UserItemProc_callback && new && new != Py_None) { + PyErr_SetString(Dlg_Error, "Another UserItemProc is already installed"); + return NULL; + } + + if (new == NULL || new == Py_None) { + new = NULL; + _res = Py_None; + Py_INCREF(Py_None); + } else { + Py_INCREF(new); + _res = Py_BuildValue("O&", ResObj_New, (Handle)NewUserItemUPP(Dlg_UnivUserItemProc)); + } + + Dlg_UserItemProc_callback = new; + return _res; } @@ -1528,9 +1529,9 @@ WindowPtr DlgObj_ConvertToWindow(PyObject *self) { - if ( DlgObj_Check(self) ) - return GetDialogWindow(((DialogObject *)self)->ob_itself); - return NULL; + if ( DlgObj_Check(self) ) + return GetDialogWindow(((DialogObject *)self)->ob_itself); + return NULL; } #endif /* Return the object corresponding to the dialog, or None */ @@ -1538,29 +1539,29 @@ PyObject * DlgObj_WhichDialog(DialogPtr d) { - PyObject *it; - - if (d == NULL) { - it = Py_None; - Py_INCREF(it); - } else { - WindowPtr w = GetDialogWindow(d); - - it = (PyObject *) GetWRefCon(w); - if (it == NULL || ((DialogObject *)it)->ob_itself != d || !DlgObj_Check(it)) { + PyObject *it; + + if (d == NULL) { + it = Py_None; + Py_INCREF(it); + } else { + WindowPtr w = GetDialogWindow(d); + + it = (PyObject *) GetWRefCon(w); + if (it == NULL || ((DialogObject *)it)->ob_itself != d || !DlgObj_Check(it)) { #if 0 - /* Should do this, but we don't have an ob_freeit for dialogs yet. */ - it = WinObj_New(w); - ((WindowObject *)it)->ob_freeit = NULL; + /* Should do this, but we don't have an ob_freeit for dialogs yet. */ + it = WinObj_New(w); + ((WindowObject *)it)->ob_freeit = NULL; #else - it = Py_None; - Py_INCREF(it); + it = Py_None; + Py_INCREF(it); #endif - } else { - Py_INCREF(it); - } - } - return it; + } else { + Py_INCREF(it); + } + } + return it; } @@ -1571,9 +1572,9 @@ - PyMac_INIT_TOOLBOX_OBJECT_NEW(DialogPtr, DlgObj_New); - PyMac_INIT_TOOLBOX_OBJECT_NEW(DialogPtr, DlgObj_WhichDialog); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(DialogPtr, DlgObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(DialogPtr, DlgObj_New); + PyMac_INIT_TOOLBOX_OBJECT_NEW(DialogPtr, DlgObj_WhichDialog); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(DialogPtr, DlgObj_Convert); m = Py_InitModule("_Dlg", Dlg_methods); From jhylton at users.sourceforge.net Sun Oct 16 07:24:35 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/drag _Dragmodule.c, 1.9.2.2, 1.9.2.3 Message-ID: <20051016052435.7F1A21E4015@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/drag In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/drag Modified Files: Tag: ast-branch _Dragmodule.c Log Message: Merge head to branch (for the last time) Index: _Dragmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/drag/_Dragmodule.c,v retrieving revision 1.9.2.2 retrieving revision 1.9.2.3 diff -u -d -r1.9.2.2 -r1.9.2.3 --- _Dragmodule.c 7 Jan 2005 07:01:23 -0000 1.9.2.2 +++ _Dragmodule.c 16 Oct 2005 05:24:02 -0000 1.9.2.3 @@ -52,15 +52,16 @@ { DragObjObject *it; if (itself == NULL) { - PyErr_SetString(Drag_Error,"Cannot create null Drag"); - return NULL; - } + PyErr_SetString(Drag_Error,"Cannot create null Drag"); + return NULL; + } it = PyObject_NEW(DragObjObject, &DragObj_Type); if (it == NULL) return NULL; it->ob_itself = itself; it->sendproc = NULL; return (PyObject *)it; } + int DragObj_Convert(PyObject *v, DragRef *p_itself) { if (!DragObj_Check(v)) @@ -743,16 +744,16 @@ #define DragObj_tp_alloc PyType_GenericAlloc -static PyObject *DragObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *DragObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; DragRef itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, DragObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((DragObjObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, DragObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((DragObjObject *)_self)->ob_itself = itself; + return _self; } #define DragObj_tp_free PyObject_Del @@ -920,15 +921,15 @@ PyObject *callback; WindowPtr theWindow = NULL; OSErr _err; - + if ( !PyArg_ParseTuple(_args, "O|O&", &callback, WinObj_Convert, &theWindow) ) - return NULL; - Py_INCREF(callback); /* Cannot decref later, too bad */ + return NULL; + Py_INCREF(callback); /* Cannot decref later, too bad */ _err = InstallTrackingHandler(dragglue_TrackingHandlerUPP, theWindow, (void *)callback); - if (_err != noErr) return PyMac_Error(_err); - Py_INCREF(Py_None); - _res = Py_None; - return _res; + if (_err != noErr) return PyMac_Error(_err); + Py_INCREF(Py_None); + _res = Py_None; + return _res; } @@ -939,15 +940,15 @@ PyObject *callback; WindowPtr theWindow = NULL; OSErr _err; - + if ( !PyArg_ParseTuple(_args, "O|O&", &callback, WinObj_Convert, &theWindow) ) - return NULL; - Py_INCREF(callback); /* Cannot decref later, too bad */ + return NULL; + Py_INCREF(callback); /* Cannot decref later, too bad */ _err = InstallReceiveHandler(dragglue_ReceiveHandlerUPP, theWindow, (void *)callback); - if (_err != noErr) return PyMac_Error(_err); - Py_INCREF(Py_None); - _res = Py_None; - return _res; + if (_err != noErr) return PyMac_Error(_err); + Py_INCREF(Py_None); + _res = Py_None; + return _res; } @@ -957,14 +958,14 @@ WindowPtr theWindow = NULL; OSErr _err; - + if ( !PyArg_ParseTuple(_args, "|O&", WinObj_Convert, &theWindow) ) - return NULL; + return NULL; _err = RemoveTrackingHandler(dragglue_TrackingHandlerUPP, theWindow); - if (_err != noErr) return PyMac_Error(_err); - Py_INCREF(Py_None); - _res = Py_None; - return _res; + if (_err != noErr) return PyMac_Error(_err); + Py_INCREF(Py_None); + _res = Py_None; + return _res; } @@ -974,14 +975,14 @@ WindowPtr theWindow = NULL; OSErr _err; - + if ( !PyArg_ParseTuple(_args, "|O&", WinObj_Convert, &theWindow) ) - return NULL; + return NULL; _err = RemoveReceiveHandler(dragglue_ReceiveHandlerUPP, theWindow); - if (_err != noErr) return PyMac_Error(_err); - Py_INCREF(Py_None); - _res = Py_None; - return _res; + if (_err != noErr) return PyMac_Error(_err); + Py_INCREF(Py_None); + _res = Py_None; + return _res; } @@ -1013,81 +1014,81 @@ dragglue_TrackingHandler(DragTrackingMessage theMessage, WindowPtr theWindow, void *handlerRefCon, DragReference theDrag) { - PyObject *args, *rv; - int i; - - args = Py_BuildValue("hO&O&", theMessage, DragObj_New, theDrag, WinObj_WhichWindow, theWindow); - if ( args == NULL ) - return -1; - rv = PyEval_CallObject((PyObject *)handlerRefCon, args); - Py_DECREF(args); - if ( rv == NULL ) { - PySys_WriteStderr("Drag: Exception in TrackingHandler\n"); - PyErr_Print(); - return -1; - } - i = -1; - if ( rv == Py_None ) - i = 0; - else - PyArg_Parse(rv, "l", &i); - Py_DECREF(rv); - return i; + PyObject *args, *rv; + int i; + + args = Py_BuildValue("hO&O&", theMessage, DragObj_New, theDrag, WinObj_WhichWindow, theWindow); + if ( args == NULL ) + return -1; + rv = PyEval_CallObject((PyObject *)handlerRefCon, args); + Py_DECREF(args); + if ( rv == NULL ) { + PySys_WriteStderr("Drag: Exception in TrackingHandler\n"); + PyErr_Print(); + return -1; + } + i = -1; + if ( rv == Py_None ) + i = 0; + else + PyArg_Parse(rv, "l", &i); + Py_DECREF(rv); + return i; } static pascal OSErr dragglue_ReceiveHandler(WindowPtr theWindow, void *handlerRefCon, DragReference theDrag) { - PyObject *args, *rv; - int i; - - args = Py_BuildValue("O&O&", DragObj_New, theDrag, WinObj_WhichWindow, theWindow); - if ( args == NULL ) - return -1; - rv = PyEval_CallObject((PyObject *)handlerRefCon, args); - Py_DECREF(args); - if ( rv == NULL ) { - PySys_WriteStderr("Drag: Exception in ReceiveHandler\n"); - PyErr_Print(); - return -1; - } - i = -1; - if ( rv == Py_None ) - i = 0; - else - PyArg_Parse(rv, "l", &i); - Py_DECREF(rv); - return i; + PyObject *args, *rv; + int i; + + args = Py_BuildValue("O&O&", DragObj_New, theDrag, WinObj_WhichWindow, theWindow); + if ( args == NULL ) + return -1; + rv = PyEval_CallObject((PyObject *)handlerRefCon, args); + Py_DECREF(args); + if ( rv == NULL ) { + PySys_WriteStderr("Drag: Exception in ReceiveHandler\n"); + PyErr_Print(); + return -1; + } + i = -1; + if ( rv == Py_None ) + i = 0; + else + PyArg_Parse(rv, "l", &i); + Py_DECREF(rv); + return i; } static pascal OSErr dragglue_SendData(FlavorType theType, void *dragSendRefCon, ItemReference theItem, DragReference theDrag) { - DragObjObject *self = (DragObjObject *)dragSendRefCon; - PyObject *args, *rv; - int i; - - if ( self->sendproc == NULL ) - return -1; - args = Py_BuildValue("O&l", PyMac_BuildOSType, theType, theItem); - if ( args == NULL ) - return -1; - rv = PyEval_CallObject(self->sendproc, args); - Py_DECREF(args); - if ( rv == NULL ) { - PySys_WriteStderr("Drag: Exception in SendDataHandler\n"); - PyErr_Print(); - return -1; - } - i = -1; - if ( rv == Py_None ) - i = 0; - else - PyArg_Parse(rv, "l", &i); - Py_DECREF(rv); - return i; + DragObjObject *self = (DragObjObject *)dragSendRefCon; + PyObject *args, *rv; + int i; + + if ( self->sendproc == NULL ) + return -1; + args = Py_BuildValue("O&l", PyMac_BuildOSType, theType, theItem); + if ( args == NULL ) + return -1; + rv = PyEval_CallObject(self->sendproc, args); + Py_DECREF(args); + if ( rv == NULL ) { + PySys_WriteStderr("Drag: Exception in SendDataHandler\n"); + PyErr_Print(); + return -1; + } + i = -1; + if ( rv == Py_None ) + i = 0; + else + PyArg_Parse(rv, "l", &i); + Py_DECREF(rv); + return i; } #if 0 @@ -1115,8 +1116,8 @@ - PyMac_INIT_TOOLBOX_OBJECT_NEW(DragRef, DragObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(DragRef, DragObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(DragRef, DragObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(DragRef, DragObj_Convert); m = Py_InitModule("_Drag", Drag_methods); From jhylton at users.sourceforge.net Sun Oct 16 07:24:35 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/fm _Fmmodule.c, 1.6.2.2, 1.6.2.3 Message-ID: <20051016052435.6B7621E401D@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/fm In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/fm Modified Files: Tag: ast-branch _Fmmodule.c Log Message: Merge head to branch (for the last time) Index: _Fmmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/fm/_Fmmodule.c,v retrieving revision 1.6.2.2 retrieving revision 1.6.2.3 diff -u -d -r1.6.2.2 -r1.6.2.3 --- _Fmmodule.c 7 Jan 2005 07:01:24 -0000 1.6.2.2 +++ _Fmmodule.c 16 Oct 2005 05:24:02 -0000 1.6.2.3 @@ -25,12 +25,12 @@ FMRec_New(FMetricRec *itself) { - return Py_BuildValue("O&O&O&O&O&", - PyMac_BuildFixed, itself->ascent, - PyMac_BuildFixed, itself->descent, - PyMac_BuildFixed, itself->leading, - PyMac_BuildFixed, itself->widMax, - ResObj_New, itself->wTabHandle); + return Py_BuildValue("O&O&O&O&O&", + PyMac_BuildFixed, itself->ascent, + PyMac_BuildFixed, itself->descent, + PyMac_BuildFixed, itself->leading, + PyMac_BuildFixed, itself->widMax, + ResObj_New, itself->wTabHandle); } #if 0 @@ -38,12 +38,12 @@ static int FMRec_Convert(PyObject *v, FMetricRec *p_itself) { - return PyArg_ParseTuple(v, "O&O&O&O&O&", - PyMac_GetFixed, &itself->ascent, - PyMac_GetFixed, &itself->descent, - PyMac_GetFixed, &itself->leading, - PyMac_GetFixed, &itself->widMax, - ResObj_Convert, &itself->wTabHandle); + return PyArg_ParseTuple(v, "O&O&O&O&O&", + PyMac_GetFixed, &itself->ascent, + PyMac_GetFixed, &itself->descent, + PyMac_GetFixed, &itself->leading, + PyMac_GetFixed, &itself->widMax, + ResObj_Convert, &itself->wTabHandle); } #endif From jhylton at users.sourceforge.net Sun Oct 16 07:24:35 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/launch _Launchmodule.c, 1.4.6.1, 1.4.6.2 Message-ID: <20051016052435.86EBA1E400D@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/launch In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/launch Modified Files: Tag: ast-branch _Launchmodule.c Log Message: Merge head to branch (for the last time) Index: _Launchmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/launch/_Launchmodule.c,v retrieving revision 1.4.6.1 retrieving revision 1.4.6.2 diff -u -d -r1.4.6.1 -r1.4.6.2 --- _Launchmodule.c 7 Jan 2005 07:01:27 -0000 1.4.6.1 +++ _Launchmodule.c 16 Oct 2005 05:24:02 -0000 1.4.6.2 @@ -27,21 +27,21 @@ static int OptCFStringRefObj_Convert(PyObject *v, CFStringRef *spec) { - if (v == Py_None) { - *spec = NULL; - return 1; - } - return CFStringRefObj_Convert(v, spec); + if (v == Py_None) { + *spec = NULL; + return 1; + } + return CFStringRefObj_Convert(v, spec); } PyObject * OptCFStringRefObj_New(CFStringRef it) { - if (it == NULL) { - Py_INCREF(Py_None); - return Py_None; - } - return CFStringRefObj_New(it); + if (it == NULL) { + Py_INCREF(Py_None); + return Py_None; + } + return CFStringRefObj_New(it); } /* @@ -50,13 +50,13 @@ PyObject * LSItemInfoRecord_New(LSItemInfoRecord *it) { - return Py_BuildValue("{s:is:O&s:O&s:O&s:O&s:i}", - "flags", it->flags, - "filetype", PyMac_BuildOSType, it->filetype, - "creator", PyMac_BuildOSType, it->creator, - "extension", OptCFStringRefObj_New, it->extension, - "iconFileName", OptCFStringRefObj_New, it->iconFileName, - "kindID", it->kindID); + return Py_BuildValue("{s:is:O&s:O&s:O&s:O&s:i}", + "flags", it->flags, + "filetype", PyMac_BuildOSType, it->filetype, + "creator", PyMac_BuildOSType, it->creator, + "extension", OptCFStringRefObj_New, it->extension, + "iconFileName", OptCFStringRefObj_New, it->iconFileName, + "kindID", it->kindID); } static PyObject *Launch_Error; From jhylton at users.sourceforge.net Sun Oct 16 07:24:35 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/cf _CFmodule.c, 1.15.2.2, 1.15.2.3 cfsupport.py, 1.18.2.2, 1.18.2.3 Message-ID: <20051016052435.8611C1E401E@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/cf In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/cf Modified Files: Tag: ast-branch _CFmodule.c cfsupport.py Log Message: Merge head to branch (for the last time) Index: _CFmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/cf/_CFmodule.c,v retrieving revision 1.15.2.2 retrieving revision 1.15.2.3 diff -u -d -r1.15.2.2 -r1.15.2.3 --- _CFmodule.c 7 Jan 2005 07:01:20 -0000 1.15.2.2 +++ _CFmodule.c 16 Oct 2005 05:24:01 -0000 1.15.2.3 @@ -84,19 +84,19 @@ PyObject *CFRange_New(CFRange *itself) { - return Py_BuildValue("ll", (long)itself->location, (long)itself->length); + return Py_BuildValue("ll", (long)itself->location, (long)itself->length); } int CFRange_Convert(PyObject *v, CFRange *p_itself) { - long location, length; - - if( !PyArg_ParseTuple(v, "ll", &location, &length) ) - return 0; - p_itself->location = (CFIndex)location; - p_itself->length = (CFIndex)length; - return 1; + long location, length; + + if( !PyArg_ParseTuple(v, "ll", &location, &length) ) + return 0; + p_itself->location = (CFIndex)location; + p_itself->length = (CFIndex)length; + return 1; } /* Optional CFURL argument or None (passed as NULL) */ @@ -104,8 +104,8 @@ OptionalCFURLRefObj_Convert(PyObject *v, CFURLRef *p_itself) { if ( v == Py_None ) { - p_itself = NULL; - return 1; + p_itself = NULL; + return 1; } return CFURLRefObj_Convert(v, p_itself); } @@ -138,6 +138,7 @@ it->ob_freeit = CFRelease; return (PyObject *)it; } + int CFTypeRefObj_Convert(PyObject *v, CFTypeRef *p_itself) { @@ -322,16 +323,16 @@ CFStringRef errorString; if (!PyArg_ParseTuple(_args, "l", &mutabilityOption)) - return NULL; + return NULL; _rv = CFPropertyListCreateFromXMLData((CFAllocatorRef)NULL, _self->ob_itself, mutabilityOption, &errorString); if (errorString) - CFRelease(errorString); + CFRelease(errorString); if (_rv == NULL) { - PyErr_SetString(PyExc_RuntimeError, "Parse error in XML data"); - return NULL; + PyErr_SetString(PyExc_RuntimeError, "Parse error in XML data"); + return NULL; } _res = Py_BuildValue("O&", CFTypeRefObj_New, _rv); @@ -399,14 +400,14 @@ /* XXXX Or should we use CFHash?? */ return (int)self->ob_itself; } -static int CFTypeRefObj_tp_init(PyObject *self, PyObject *args, PyObject *kwds) +static int CFTypeRefObj_tp_init(PyObject *_self, PyObject *_args, PyObject *_kwds) { CFTypeRef itself; char *kw[] = {"itself", 0}; - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) { - ((CFTypeRefObject *)self)->ob_itself = itself; + ((CFTypeRefObject *)_self)->ob_itself = itself; return 0; } return -1; @@ -414,7 +415,7 @@ #define CFTypeRefObj_tp_alloc PyType_GenericAlloc -static PyObject *CFTypeRefObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *CFTypeRefObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { PyObject *self; if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; @@ -500,6 +501,7 @@ it->ob_freeit = CFRelease; return (PyObject *)it; } + int CFArrayRefObj_Convert(PyObject *v, CFArrayRef *p_itself) { @@ -602,21 +604,21 @@ /* XXXX Or should we use CFHash?? */ return (int)self->ob_itself; } -static int CFArrayRefObj_tp_init(PyObject *self, PyObject *args, PyObject *kwds) +static int CFArrayRefObj_tp_init(PyObject *_self, PyObject *_args, PyObject *_kwds) { CFArrayRef itself; char *kw[] = {"itself", 0}; - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFArrayRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFArrayRefObj_Convert, &itself)) { - ((CFArrayRefObject *)self)->ob_itself = itself; + ((CFArrayRefObject *)_self)->ob_itself = itself; return 0; } /* Any CFTypeRef descendent is allowed as initializer too */ - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) { - ((CFArrayRefObject *)self)->ob_itself = itself; + ((CFArrayRefObject *)_self)->ob_itself = itself; return 0; } return -1; @@ -624,7 +626,7 @@ #define CFArrayRefObj_tp_alloc PyType_GenericAlloc -static PyObject *CFArrayRefObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *CFArrayRefObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { PyObject *self; if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; @@ -710,6 +712,7 @@ it->ob_freeit = CFRelease; return (PyObject *)it; } + int CFMutableArrayRefObj_Convert(PyObject *v, CFMutableArrayRef *p_itself) { @@ -841,21 +844,21 @@ /* XXXX Or should we use CFHash?? */ return (int)self->ob_itself; } -static int CFMutableArrayRefObj_tp_init(PyObject *self, PyObject *args, PyObject *kwds) +static int CFMutableArrayRefObj_tp_init(PyObject *_self, PyObject *_args, PyObject *_kwds) { CFMutableArrayRef itself; char *kw[] = {"itself", 0}; - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFMutableArrayRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFMutableArrayRefObj_Convert, &itself)) { - ((CFMutableArrayRefObject *)self)->ob_itself = itself; + ((CFMutableArrayRefObject *)_self)->ob_itself = itself; return 0; } /* Any CFTypeRef descendent is allowed as initializer too */ - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) { - ((CFMutableArrayRefObject *)self)->ob_itself = itself; + ((CFMutableArrayRefObject *)_self)->ob_itself = itself; return 0; } return -1; @@ -863,7 +866,7 @@ #define CFMutableArrayRefObj_tp_alloc PyType_GenericAlloc -static PyObject *CFMutableArrayRefObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *CFMutableArrayRefObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { PyObject *self; if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; @@ -949,6 +952,7 @@ it->ob_freeit = CFRelease; return (PyObject *)it; } + int CFDictionaryRefObj_Convert(PyObject *v, CFDictionaryRef *p_itself) { @@ -1033,21 +1037,21 @@ /* XXXX Or should we use CFHash?? */ return (int)self->ob_itself; } -static int CFDictionaryRefObj_tp_init(PyObject *self, PyObject *args, PyObject *kwds) +static int CFDictionaryRefObj_tp_init(PyObject *_self, PyObject *_args, PyObject *_kwds) { CFDictionaryRef itself; char *kw[] = {"itself", 0}; - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFDictionaryRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFDictionaryRefObj_Convert, &itself)) { - ((CFDictionaryRefObject *)self)->ob_itself = itself; + ((CFDictionaryRefObject *)_self)->ob_itself = itself; return 0; } /* Any CFTypeRef descendent is allowed as initializer too */ - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) { - ((CFDictionaryRefObject *)self)->ob_itself = itself; + ((CFDictionaryRefObject *)_self)->ob_itself = itself; return 0; } return -1; @@ -1055,7 +1059,7 @@ #define CFDictionaryRefObj_tp_alloc PyType_GenericAlloc -static PyObject *CFDictionaryRefObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *CFDictionaryRefObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { PyObject *self; if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; @@ -1141,6 +1145,7 @@ it->ob_freeit = CFRelease; return (PyObject *)it; } + int CFMutableDictionaryRefObj_Convert(PyObject *v, CFMutableDictionaryRef *p_itself) { @@ -1209,21 +1214,21 @@ /* XXXX Or should we use CFHash?? */ return (int)self->ob_itself; } -static int CFMutableDictionaryRefObj_tp_init(PyObject *self, PyObject *args, PyObject *kwds) +static int CFMutableDictionaryRefObj_tp_init(PyObject *_self, PyObject *_args, PyObject *_kwds) { CFMutableDictionaryRef itself; char *kw[] = {"itself", 0}; - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFMutableDictionaryRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFMutableDictionaryRefObj_Convert, &itself)) { - ((CFMutableDictionaryRefObject *)self)->ob_itself = itself; + ((CFMutableDictionaryRefObject *)_self)->ob_itself = itself; return 0; } /* Any CFTypeRef descendent is allowed as initializer too */ - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) { - ((CFMutableDictionaryRefObject *)self)->ob_itself = itself; + ((CFMutableDictionaryRefObject *)_self)->ob_itself = itself; return 0; } return -1; @@ -1231,7 +1236,7 @@ #define CFMutableDictionaryRefObj_tp_alloc PyType_GenericAlloc -static PyObject *CFMutableDictionaryRefObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *CFMutableDictionaryRefObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { PyObject *self; if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; @@ -1317,6 +1322,7 @@ it->ob_freeit = CFRelease; return (PyObject *)it; } + int CFDataRefObj_Convert(PyObject *v, CFDataRef *p_itself) { @@ -1439,21 +1445,21 @@ /* XXXX Or should we use CFHash?? */ return (int)self->ob_itself; } -static int CFDataRefObj_tp_init(PyObject *self, PyObject *args, PyObject *kwds) +static int CFDataRefObj_tp_init(PyObject *_self, PyObject *_args, PyObject *_kwds) { CFDataRef itself; char *kw[] = {"itself", 0}; - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFDataRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFDataRefObj_Convert, &itself)) { - ((CFDataRefObject *)self)->ob_itself = itself; + ((CFDataRefObject *)_self)->ob_itself = itself; return 0; } /* Any CFTypeRef descendent is allowed as initializer too */ - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) { - ((CFDataRefObject *)self)->ob_itself = itself; + ((CFDataRefObject *)_self)->ob_itself = itself; return 0; } return -1; @@ -1461,7 +1467,7 @@ #define CFDataRefObj_tp_alloc PyType_GenericAlloc -static PyObject *CFDataRefObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *CFDataRefObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { PyObject *self; if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; @@ -1547,6 +1553,7 @@ it->ob_freeit = CFRelease; return (PyObject *)it; } + int CFMutableDataRefObj_Convert(PyObject *v, CFMutableDataRef *p_itself) { @@ -1703,21 +1710,21 @@ /* XXXX Or should we use CFHash?? */ return (int)self->ob_itself; } -static int CFMutableDataRefObj_tp_init(PyObject *self, PyObject *args, PyObject *kwds) +static int CFMutableDataRefObj_tp_init(PyObject *_self, PyObject *_args, PyObject *_kwds) { CFMutableDataRef itself; char *kw[] = {"itself", 0}; - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFMutableDataRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFMutableDataRefObj_Convert, &itself)) { - ((CFMutableDataRefObject *)self)->ob_itself = itself; + ((CFMutableDataRefObject *)_self)->ob_itself = itself; return 0; } /* Any CFTypeRef descendent is allowed as initializer too */ - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) { - ((CFMutableDataRefObject *)self)->ob_itself = itself; + ((CFMutableDataRefObject *)_self)->ob_itself = itself; return 0; } return -1; @@ -1725,7 +1732,7 @@ #define CFMutableDataRefObj_tp_alloc PyType_GenericAlloc -static PyObject *CFMutableDataRefObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *CFMutableDataRefObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { PyObject *self; if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; @@ -1811,6 +1818,7 @@ it->ob_freeit = CFRelease; return (PyObject *)it; } + int CFStringRefObj_Convert(PyObject *v, CFStringRef *p_itself) { @@ -1818,19 +1826,19 @@ if (PyString_Check(v)) { char *cStr; if (!PyArg_Parse(v, "es", "ascii", &cStr)) - return NULL; - *p_itself = CFStringCreateWithCString((CFAllocatorRef)NULL, cStr, kCFStringEncodingASCII); - return 1; + return NULL; + *p_itself = CFStringCreateWithCString((CFAllocatorRef)NULL, cStr, kCFStringEncodingASCII); + return 1; } if (PyUnicode_Check(v)) { - /* We use the CF types here, if Python was configured differently that will give an error */ - CFIndex size = PyUnicode_GetSize(v); - UniChar *unichars = PyUnicode_AsUnicode(v); - if (!unichars) return 0; - *p_itself = CFStringCreateWithCharacters((CFAllocatorRef)NULL, unichars, size); - return 1; + /* We use the CF types here, if Python was configured differently that will give an error */ + CFIndex size = PyUnicode_GetSize(v); + UniChar *unichars = PyUnicode_AsUnicode(v); + if (!unichars) return 0; + *p_itself = CFStringCreateWithCharacters((CFAllocatorRef)NULL, unichars, size); + return 1; } - + if (!CFStringRefObj_Check(v)) { @@ -2335,10 +2343,10 @@ if( data == NULL ) return PyErr_NoMemory(); if ( CFStringGetCString(_self->ob_itself, data, size, 0) ) { - _res = (PyObject *)PyString_FromString(data); + _res = (PyObject *)PyString_FromString(data); } else { - PyErr_SetString(PyExc_RuntimeError, "CFStringGetCString could not fit the string"); - _res = NULL; + PyErr_SetString(PyExc_RuntimeError, "CFStringGetCString could not fit the string"); + _res = NULL; } free(data); return _res; @@ -2444,21 +2452,21 @@ /* XXXX Or should we use CFHash?? */ return (int)self->ob_itself; } -static int CFStringRefObj_tp_init(PyObject *self, PyObject *args, PyObject *kwds) +static int CFStringRefObj_tp_init(PyObject *_self, PyObject *_args, PyObject *_kwds) { CFStringRef itself; char *kw[] = {"itself", 0}; - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFStringRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFStringRefObj_Convert, &itself)) { - ((CFStringRefObject *)self)->ob_itself = itself; + ((CFStringRefObject *)_self)->ob_itself = itself; return 0; } /* Any CFTypeRef descendent is allowed as initializer too */ - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) { - ((CFStringRefObject *)self)->ob_itself = itself; + ((CFStringRefObject *)_self)->ob_itself = itself; return 0; } return -1; @@ -2466,7 +2474,7 @@ #define CFStringRefObj_tp_alloc PyType_GenericAlloc -static PyObject *CFStringRefObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *CFStringRefObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { PyObject *self; if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; @@ -2552,6 +2560,7 @@ it->ob_freeit = CFRelease; return (PyObject *)it; } + int CFMutableStringRefObj_Convert(PyObject *v, CFMutableStringRef *p_itself) { @@ -2831,21 +2840,21 @@ /* XXXX Or should we use CFHash?? */ return (int)self->ob_itself; } -static int CFMutableStringRefObj_tp_init(PyObject *self, PyObject *args, PyObject *kwds) +static int CFMutableStringRefObj_tp_init(PyObject *_self, PyObject *_args, PyObject *_kwds) { CFMutableStringRef itself; char *kw[] = {"itself", 0}; - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFMutableStringRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFMutableStringRefObj_Convert, &itself)) { - ((CFMutableStringRefObject *)self)->ob_itself = itself; + ((CFMutableStringRefObject *)_self)->ob_itself = itself; return 0; } /* Any CFTypeRef descendent is allowed as initializer too */ - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) { - ((CFMutableStringRefObject *)self)->ob_itself = itself; + ((CFMutableStringRefObject *)_self)->ob_itself = itself; return 0; } return -1; @@ -2853,7 +2862,7 @@ #define CFMutableStringRefObj_tp_alloc PyType_GenericAlloc -static PyObject *CFMutableStringRefObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *CFMutableStringRefObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { PyObject *self; if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; @@ -2939,6 +2948,7 @@ it->ob_freeit = CFRelease; return (PyObject *)it; } + int CFURLRefObj_Convert(PyObject *v, CFURLRef *p_itself) { @@ -3482,21 +3492,21 @@ /* XXXX Or should we use CFHash?? */ return (int)self->ob_itself; } -static int CFURLRefObj_tp_init(PyObject *self, PyObject *args, PyObject *kwds) +static int CFURLRefObj_tp_init(PyObject *_self, PyObject *_args, PyObject *_kwds) { CFURLRef itself; char *kw[] = {"itself", 0}; - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFURLRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFURLRefObj_Convert, &itself)) { - ((CFURLRefObject *)self)->ob_itself = itself; + ((CFURLRefObject *)_self)->ob_itself = itself; return 0; } /* Any CFTypeRef descendent is allowed as initializer too */ - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CFTypeRefObj_Convert, &itself)) { - ((CFURLRefObject *)self)->ob_itself = itself; + ((CFURLRefObject *)_self)->ob_itself = itself; return 0; } return -1; @@ -3504,7 +3514,7 @@ #define CFURLRefObj_tp_alloc PyType_GenericAlloc -static PyObject *CFURLRefObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *CFURLRefObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { PyObject *self; if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; @@ -4678,17 +4688,17 @@ CFTypeID typeid; if (!PyArg_ParseTuple(_args, "O&", PyCF_Python2CF, &rv)) - return NULL; + return NULL; typeid = CFGetTypeID(rv); if (typeid == CFStringGetTypeID()) - return Py_BuildValue("O&", CFStringRefObj_New, rv); + return Py_BuildValue("O&", CFStringRefObj_New, rv); if (typeid == CFArrayGetTypeID()) - return Py_BuildValue("O&", CFArrayRefObj_New, rv); + return Py_BuildValue("O&", CFArrayRefObj_New, rv); if (typeid == CFDictionaryGetTypeID()) - return Py_BuildValue("O&", CFDictionaryRefObj_New, rv); + return Py_BuildValue("O&", CFDictionaryRefObj_New, rv); if (typeid == CFURLGetTypeID()) - return Py_BuildValue("O&", CFURLRefObj_New, rv); + return Py_BuildValue("O&", CFURLRefObj_New, rv); _res = Py_BuildValue("O&", CFTypeRefObj_New, rv); return _res; @@ -4817,42 +4827,42 @@ /* Routines to convert any CF type to/from the corresponding CFxxxObj */ PyObject *CFObj_New(CFTypeRef itself) { - if (itself == NULL) - { - PyErr_SetString(PyExc_RuntimeError, "cannot wrap NULL"); - return NULL; - } - if (CFGetTypeID(itself) == CFArrayGetTypeID()) return CFArrayRefObj_New((CFArrayRef)itself); - if (CFGetTypeID(itself) == CFDictionaryGetTypeID()) return CFDictionaryRefObj_New((CFDictionaryRef)itself); - if (CFGetTypeID(itself) == CFDataGetTypeID()) return CFDataRefObj_New((CFDataRef)itself); - if (CFGetTypeID(itself) == CFStringGetTypeID()) return CFStringRefObj_New((CFStringRef)itself); - if (CFGetTypeID(itself) == CFURLGetTypeID()) return CFURLRefObj_New((CFURLRef)itself); - /* XXXX Or should we use PyCF_CF2Python here?? */ - return CFTypeRefObj_New(itself); + if (itself == NULL) + { + PyErr_SetString(PyExc_RuntimeError, "cannot wrap NULL"); + return NULL; + } + if (CFGetTypeID(itself) == CFArrayGetTypeID()) return CFArrayRefObj_New((CFArrayRef)itself); + if (CFGetTypeID(itself) == CFDictionaryGetTypeID()) return CFDictionaryRefObj_New((CFDictionaryRef)itself); + if (CFGetTypeID(itself) == CFDataGetTypeID()) return CFDataRefObj_New((CFDataRef)itself); + if (CFGetTypeID(itself) == CFStringGetTypeID()) return CFStringRefObj_New((CFStringRef)itself); + if (CFGetTypeID(itself) == CFURLGetTypeID()) return CFURLRefObj_New((CFURLRef)itself); + /* XXXX Or should we use PyCF_CF2Python here?? */ + return CFTypeRefObj_New(itself); } int CFObj_Convert(PyObject *v, CFTypeRef *p_itself) { - if (v == Py_None) { *p_itself = NULL; return 1; } - /* Check for other CF objects here */ + if (v == Py_None) { *p_itself = NULL; return 1; } + /* Check for other CF objects here */ - if (!CFTypeRefObj_Check(v) && - !CFArrayRefObj_Check(v) && - !CFMutableArrayRefObj_Check(v) && - !CFDictionaryRefObj_Check(v) && - !CFMutableDictionaryRefObj_Check(v) && - !CFDataRefObj_Check(v) && - !CFMutableDataRefObj_Check(v) && - !CFStringRefObj_Check(v) && - !CFMutableStringRefObj_Check(v) && - !CFURLRefObj_Check(v) ) - { - /* XXXX Or should we use PyCF_Python2CF here?? */ - PyErr_SetString(PyExc_TypeError, "CF object required"); - return 0; - } - *p_itself = ((CFTypeRefObject *)v)->ob_itself; - return 1; + if (!CFTypeRefObj_Check(v) && + !CFArrayRefObj_Check(v) && + !CFMutableArrayRefObj_Check(v) && + !CFDictionaryRefObj_Check(v) && + !CFMutableDictionaryRefObj_Check(v) && + !CFDataRefObj_Check(v) && + !CFMutableDataRefObj_Check(v) && + !CFStringRefObj_Check(v) && + !CFMutableStringRefObj_Check(v) && + !CFURLRefObj_Check(v) ) + { + /* XXXX Or should we use PyCF_Python2CF here?? */ + PyErr_SetString(PyExc_TypeError, "CF object required"); + return 0; + } + *p_itself = ((CFTypeRefObject *)v)->ob_itself; + return 1; } Index: cfsupport.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/cf/cfsupport.py,v retrieving revision 1.18.2.2 retrieving revision 1.18.2.3 diff -u -d -r1.18.2.2 -r1.18.2.3 --- cfsupport.py 7 Jan 2005 07:01:20 -0000 1.18.2.2 +++ cfsupport.py 16 Oct 2005 05:24:01 -0000 1.18.2.3 @@ -315,18 +315,18 @@ Output("%s itself;", self.itselftype) Output("char *kw[] = {\"itself\", 0};") Output() - Output("if (PyArg_ParseTupleAndKeywords(args, kwds, \"O&\", kw, %s_Convert, &itself))", + Output("if (PyArg_ParseTupleAndKeywords(_args, _kwds, \"O&\", kw, %s_Convert, &itself))", self.prefix) OutLbrace() - Output("((%s *)self)->ob_itself = itself;", self.objecttype) + Output("((%s *)_self)->ob_itself = itself;", self.objecttype) Output("return 0;") OutRbrace() if self.prefix != 'CFTypeRefObj': Output() Output("/* Any CFTypeRef descendent is allowed as initializer too */") - Output("if (PyArg_ParseTupleAndKeywords(args, kwds, \"O&\", kw, CFTypeRefObj_Convert, &itself))") + Output("if (PyArg_ParseTupleAndKeywords(_args, _kwds, \"O&\", kw, CFTypeRefObj_Convert, &itself))") OutLbrace() - Output("((%s *)self)->ob_itself = itself;", self.objecttype) + Output("((%s *)_self)->ob_itself = itself;", self.objecttype) Output("return 0;") OutRbrace() Output("return -1;") From jhylton at users.sourceforge.net Sun Oct 16 07:24:35 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/menu _Menumodule.c, 1.11.2.2, 1.11.2.3 Message-ID: <20051016052435.C187A1E401F@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/menu In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/menu Modified Files: Tag: ast-branch _Menumodule.c Log Message: Merge head to branch (for the last time) Index: _Menumodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/menu/_Menumodule.c,v retrieving revision 1.11.2.2 retrieving revision 1.11.2.3 diff -u -d -r1.11.2.2 -r1.11.2.3 --- _Menumodule.c 7 Jan 2005 07:01:28 -0000 1.11.2.2 +++ _Menumodule.c 16 Oct 2005 05:24:02 -0000 1.11.2.3 @@ -24,7 +24,7 @@ extern int _MenuObj_Convert(PyObject *, MenuHandle *); #define MenuObj_New _MenuObj_New -#define MenuObj_Convert _MenuObj_Convert +#define MenuObj_Convert _MenuObj_Convert #endif #define as_Menu(h) ((MenuHandle)h) @@ -34,21 +34,21 @@ /* Alternative version of MenuObj_New, which returns None for NULL argument */ PyObject *OptMenuObj_New(MenuRef itself) { - if (itself == NULL) { - Py_INCREF(Py_None); - return Py_None; - } - return MenuObj_New(itself); + if (itself == NULL) { + Py_INCREF(Py_None); + return Py_None; + } + return MenuObj_New(itself); } /* Alternative version of MenuObj_Convert, which returns NULL for a None argument */ int OptMenuObj_Convert(PyObject *v, MenuRef *p_itself) { - if ( v == Py_None ) { - *p_itself = NULL; - return 1; - } - return MenuObj_Convert(v, p_itself); + if ( v == Py_None ) { + *p_itself = NULL; + return 1; + } + return MenuObj_Convert(v, p_itself); } static PyObject *Menu_Error; @@ -72,6 +72,7 @@ it->ob_itself = itself; return (PyObject *)it; } + int MenuObj_Convert(PyObject *v, MenuHandle *p_itself) { if (!MenuObj_Check(v)) @@ -2536,16 +2537,16 @@ #define MenuObj_tp_alloc PyType_GenericAlloc -static PyObject *MenuObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *MenuObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; MenuHandle itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, MenuObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((MenuObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, MenuObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((MenuObject *)_self)->ob_itself = itself; + return _self; } #define MenuObj_tp_free PyObject_Del @@ -3445,8 +3446,8 @@ - PyMac_INIT_TOOLBOX_OBJECT_NEW(MenuHandle, MenuObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(MenuHandle, MenuObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(MenuHandle, MenuObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(MenuHandle, MenuObj_Convert); m = Py_InitModule("_Menu", Menu_methods); From jhylton at users.sourceforge.net Sun Oct 16 07:24:35 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/list _Listmodule.c, 1.10.2.2, 1.10.2.3 Message-ID: <20051016052435.D4CFC1E4012@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/list In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/list Modified Files: Tag: ast-branch _Listmodule.c Log Message: Merge head to branch (for the last time) Index: _Listmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/list/_Listmodule.c,v retrieving revision 1.10.2.2 retrieving revision 1.10.2.3 diff -u -d -r1.10.2.2 -r1.10.2.3 --- _Listmodule.c 7 Jan 2005 07:01:28 -0000 1.10.2.2 +++ _Listmodule.c 16 Oct 2005 05:24:02 -0000 1.10.2.3 @@ -50,9 +50,9 @@ { ListObject *it; if (itself == NULL) { - PyErr_SetString(List_Error,"Cannot create null List"); - return NULL; - } + PyErr_SetString(List_Error,"Cannot create null List"); + return NULL; + } it = PyObject_NEW(ListObject, &List_Type); if (it == NULL) return NULL; it->ob_itself = itself; @@ -61,6 +61,7 @@ SetListRefCon(itself, (long)it); return (PyObject *)it; } + int ListObj_Convert(PyObject *v, ListHandle *p_itself) { if (!ListObj_Check(v)) @@ -738,16 +739,16 @@ #define ListObj_tp_alloc PyType_GenericAlloc -static PyObject *ListObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *ListObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; ListHandle itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, ListObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((ListObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, ListObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((ListObject *)_self)->ob_itself = itself; + return _self; } #define ListObj_tp_free PyObject_Del @@ -826,10 +827,10 @@ &hasGrow, &scrollHoriz, &scrollVert)) - return NULL; + return NULL; - /* Carbon applications use the CreateCustomList API */ + /* Carbon applications use the CreateCustomList API */ theSpec.u.userProc = myListDefFunctionUPP; CreateCustomList(&rView, &dataBounds, @@ -845,7 +846,7 @@ _res = ListObj_New(outList); if (_res == NULL) - return NULL; + return NULL; Py_INCREF(listDefFunc); ((ListObject*)_res)->ob_ldef_func = listDefFunc; return _res; @@ -1024,7 +1025,7 @@ Handle h; ListObject *l; if (!PyArg_ParseTuple(_args, "O&", ResObj_Convert, &h)) - return NULL; + return NULL; l = (ListObject *)ListObj_New(as_List(h)); l->ob_must_be_disposed = 0; _res = Py_BuildValue("O", l); @@ -1066,34 +1067,34 @@ Cell theCell, SInt16 dataOffset, SInt16 dataLen, - ListHandle theList) + ListHandle theList) { - PyObject *listDefFunc, *args, *rv=NULL; - ListObject *self; - - self = (ListObject*)GetListRefCon(theList); - if (self == NULL || self->ob_itself != theList) - return; /* nothing we can do */ - listDefFunc = self->ob_ldef_func; - if (listDefFunc == NULL) - return; /* nothing we can do */ - args = Py_BuildValue("hbO&O&hhO", message, - selected, - PyMac_BuildRect, cellRect, - PyMac_BuildPoint, theCell, - dataOffset, - dataLen, - self); - if (args != NULL) { - rv = PyEval_CallObject(listDefFunc, args); - Py_DECREF(args); - } - if (rv == NULL) { - PySys_WriteStderr("error in list definition callback:\n"); - PyErr_Print(); - } else { - Py_DECREF(rv); - } + PyObject *listDefFunc, *args, *rv=NULL; + ListObject *self; + + self = (ListObject*)GetListRefCon(theList); + if (self == NULL || self->ob_itself != theList) + return; /* nothing we can do */ + listDefFunc = self->ob_ldef_func; + if (listDefFunc == NULL) + return; /* nothing we can do */ + args = Py_BuildValue("hbO&O&hhO", message, + selected, + PyMac_BuildRect, cellRect, + PyMac_BuildPoint, theCell, + dataOffset, + dataLen, + self); + if (args != NULL) { + rv = PyEval_CallObject(listDefFunc, args); + Py_DECREF(args); + } + if (rv == NULL) { + PySys_WriteStderr("error in list definition callback:\n"); + PyErr_Print(); + } else { + Py_DECREF(rv); + } } From jhylton at users.sourceforge.net Sun Oct 16 07:24:36 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:36 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/qdoffs _Qdoffsmodule.c, 1.8.2.2, 1.8.2.3 Message-ID: <20051016052436.259C21E400C@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/qdoffs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/qdoffs Modified Files: Tag: ast-branch _Qdoffsmodule.c Log Message: Merge head to branch (for the last time) Index: _Qdoffsmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/qdoffs/_Qdoffsmodule.c,v retrieving revision 1.8.2.2 retrieving revision 1.8.2.3 diff -u -d -r1.8.2.2 -r1.8.2.3 --- _Qdoffsmodule.c 7 Jan 2005 07:01:40 -0000 1.8.2.2 +++ _Qdoffsmodule.c 16 Oct 2005 05:24:02 -0000 1.8.2.3 @@ -50,6 +50,7 @@ it->ob_itself = itself; return (PyObject *)it; } + int GWorldObj_Convert(PyObject *v, GWorldPtr *p_itself) { if (!GWorldObj_Check(v)) @@ -134,16 +135,16 @@ #define GWorldObj_tp_alloc PyType_GenericAlloc -static PyObject *GWorldObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *GWorldObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; GWorldPtr itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, GWorldObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((GWorldObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, GWorldObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((GWorldObject *)_self)->ob_itself = itself; + return _self; } #define GWorldObj_tp_free PyObject_Del @@ -605,7 +606,7 @@ char *cp; if ( !PyArg_ParseTuple(_args, "O&ii", ResObj_Convert, &pm, &from, &length) ) - return NULL; + return NULL; cp = GetPixBaseAddr(pm)+from; _res = PyString_FromStringAndSize(cp, length); return _res; @@ -621,7 +622,7 @@ char *cp, *icp; if ( !PyArg_ParseTuple(_args, "O&is#", ResObj_Convert, &pm, &from, &icp, &length) ) - return NULL; + return NULL; cp = GetPixBaseAddr(pm)+from; memcpy(cp, icp, length); Py_INCREF(Py_None); @@ -690,8 +691,8 @@ - PyMac_INIT_TOOLBOX_OBJECT_NEW(GWorldPtr, GWorldObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(GWorldPtr, GWorldObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(GWorldPtr, GWorldObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(GWorldPtr, GWorldObj_Convert); m = Py_InitModule("_Qdoffs", Qdoffs_methods); From jhylton at users.sourceforge.net Sun Oct 16 07:24:35 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/ctl _Ctlmodule.c, 1.17.2.2, 1.17.2.3 Message-ID: <20051016052435.EC93B1E4014@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/ctl In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/ctl Modified Files: Tag: ast-branch _Ctlmodule.c Log Message: Merge head to branch (for the last time) Index: _Ctlmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/ctl/_Ctlmodule.c,v retrieving revision 1.17.2.2 retrieving revision 1.17.2.3 diff -u -d -r1.17.2.2 -r1.17.2.3 --- _Ctlmodule.c 7 Jan 2005 07:01:21 -0000 1.17.2.2 +++ _Ctlmodule.c 16 Oct 2005 05:24:01 -0000 1.17.2.3 @@ -40,19 +40,19 @@ ControlFontStyle_New(ControlFontStyleRec *itself) { - return Py_BuildValue("hhhhhhO&O&", itself->flags, itself->font, - itself->size, itself->style, itself->mode, itself->just, - QdRGB_New, &itself->foreColor, QdRGB_New, &itself->backColor); + return Py_BuildValue("hhhhhhO&O&", itself->flags, itself->font, + itself->size, itself->style, itself->mode, itself->just, + QdRGB_New, &itself->foreColor, QdRGB_New, &itself->backColor); } #endif static int ControlFontStyle_Convert(PyObject *v, ControlFontStyleRec *itself) { - return PyArg_Parse(v, "(hhhhhhO&O&)", &itself->flags, - &itself->font, &itself->size, &itself->style, &itself->mode, - &itself->just, QdRGB_Convert, &itself->foreColor, - QdRGB_Convert, &itself->backColor); + return PyArg_Parse(v, "(hhhhhhO&O&)", &itself->flags, + &itself->font, &itself->size, &itself->style, &itself->mode, + &itself->just, QdRGB_Convert, &itself->foreColor, + QdRGB_Convert, &itself->backColor); } /* @@ -62,13 +62,13 @@ PyControlID_New(ControlID *itself) { - return Py_BuildValue("O&l", PyMac_BuildOSType, itself->signature, itself->id); + return Py_BuildValue("O&l", PyMac_BuildOSType, itself->signature, itself->id); } static int PyControlID_Convert(PyObject *v, ControlID *itself) { - return PyArg_Parse(v, "(O&l)", PyMac_GetOSType, &itself->signature, &itself->id); + return PyArg_Parse(v, "(O&l)", PyMac_GetOSType, &itself->signature, &itself->id); } /* @@ -77,40 +77,40 @@ static int DataBrowserTableViewColumnDesc_Convert(PyObject *v, DataBrowserTableViewColumnDesc *itself) { - return PyArg_Parse(v, "(lO&l)", - &itself->propertyID, - PyMac_GetOSType, &itself->propertyType, - &itself->propertyFlags); + return PyArg_Parse(v, "(lO&l)", + &itself->propertyID, + PyMac_GetOSType, &itself->propertyType, + &itself->propertyFlags); } static int ControlButtonContentInfo_Convert(PyObject *v, ControlButtonContentInfo *itself) { - return PyArg_Parse(v, "(hO&)", - &itself->contentType, - OptResObj_Convert, &itself->u.iconSuite); + return PyArg_Parse(v, "(hO&)", + &itself->contentType, + OptResObj_Convert, &itself->u.iconSuite); } static int DataBrowserListViewHeaderDesc_Convert(PyObject *v, DataBrowserListViewHeaderDesc *itself) { - itself->version = kDataBrowserListViewLatestHeaderDesc; - return PyArg_Parse(v, "(HHhO&HO&O&)", - &itself->minimumWidth, - &itself->maximumWidth, - &itself->titleOffset, - CFStringRefObj_Convert, &itself->titleString, - &itself->initialOrder, - ControlFontStyle_Convert, &itself->btnFontStyle, - ControlButtonContentInfo_Convert, &itself->btnContentInfo); + itself->version = kDataBrowserListViewLatestHeaderDesc; + return PyArg_Parse(v, "(HHhO&HO&O&)", + &itself->minimumWidth, + &itself->maximumWidth, + &itself->titleOffset, + CFStringRefObj_Convert, &itself->titleString, + &itself->initialOrder, + ControlFontStyle_Convert, &itself->btnFontStyle, + ControlButtonContentInfo_Convert, &itself->btnContentInfo); } static int DataBrowserListViewColumnDesc_Convert(PyObject *v, DataBrowserListViewColumnDesc *itself) { - return PyArg_Parse(v, "(O&O&)", - DataBrowserTableViewColumnDesc_Convert, &itself->propertyDesc, - DataBrowserListViewHeaderDesc_Convert, &itself->headerBtnDesc); + return PyArg_Parse(v, "(O&O&)", + DataBrowserTableViewColumnDesc_Convert, &itself->propertyDesc, + DataBrowserListViewHeaderDesc_Convert, &itself->headerBtnDesc); } /* TrackControl and HandleControlClick callback support */ @@ -125,8 +125,8 @@ static ControlUserPaneHitTestUPP myhittestproc_upp; static ControlUserPaneTrackingUPP mytrackingproc_upp; -static int settrackfunc(PyObject *); /* forward */ -static void clrtrackfunc(void); /* forward */ +static int settrackfunc(PyObject *); /* forward */ +static void clrtrackfunc(void); /* forward */ static int setcallback(PyObject *, OSType, PyObject *, UniversalProcPtr *); static PyObject *Ctl_Error; @@ -154,6 +154,7 @@ it->ob_callbackdict = NULL; return (PyObject *)it; } + int CtlObj_Convert(PyObject *v, ControlHandle *p_itself) { if (!CtlObj_Check(v)) @@ -3223,16 +3224,16 @@ { PyObject *_res = NULL; - if (!PyArg_ParseTuple(_args, "")) - return NULL; - if ( _self->ob_itself ) { - SetControlReference(_self->ob_itself, (long)0); /* Make it forget about us */ - DisposeControl(_self->ob_itself); - _self->ob_itself = NULL; - } - Py_INCREF(Py_None); - _res = Py_None; - return _res; + if (!PyArg_ParseTuple(_args, "")) + return NULL; + if ( _self->ob_itself ) { + SetControlReference(_self->ob_itself, (long)0); /* Make it forget about us */ + DisposeControl(_self->ob_itself); + _self->ob_itself = NULL; + } + Py_INCREF(Py_None); + _res = Py_None; + return _res; } @@ -3247,14 +3248,14 @@ if (!PyArg_ParseTuple(_args, "O&|O", PyMac_GetPoint, &startPoint, &callback)) - return NULL; + return NULL; if (callback && callback != Py_None) { - if (PyInt_Check(callback) && PyInt_AS_LONG(callback) == -1) - upp = (ControlActionUPP)-1; - else { - settrackfunc(callback); - upp = mytracker_upp; - } + if (PyInt_Check(callback) && PyInt_AS_LONG(callback) == -1) + upp = (ControlActionUPP)-1; + else { + settrackfunc(callback); + upp = mytracker_upp; + } } _rv = TrackControl(_self->ob_itself, startPoint, @@ -3280,14 +3281,14 @@ PyMac_GetPoint, &startPoint, &modifiers, &callback)) - return NULL; + return NULL; if (callback && callback != Py_None) { - if (PyInt_Check(callback) && PyInt_AS_LONG(callback) == -1) - upp = (ControlActionUPP)-1; - else { - settrackfunc(callback); - upp = mytracker_upp; - } + if (PyInt_Check(callback) && PyInt_AS_LONG(callback) == -1) + upp = (ControlActionUPP)-1; + else { + settrackfunc(callback); + upp = mytracker_upp; + } } _rv = HandleControlClick(_self->ob_itself, startPoint, @@ -3314,16 +3315,16 @@ &inPart, PyMac_GetOSType, &inTagName, &buffer, &bufferSize)) - return NULL; + return NULL; _err = SetControlData(_self->ob_itself, - inPart, - inTagName, - bufferSize, + inPart, + inTagName, + bufferSize, buffer); if (_err != noErr) - return PyMac_Error(_err); + return PyMac_Error(_err); _res = Py_None; return _res; @@ -3343,29 +3344,29 @@ if (!PyArg_ParseTuple(_args, "hO&", &inPart, PyMac_GetOSType, &inTagName)) - return NULL; + return NULL; /* allocate a buffer for the data */ _err = GetControlDataSize(_self->ob_itself, - inPart, - inTagName, + inPart, + inTagName, &bufferSize); if (_err != noErr) - return PyMac_Error(_err); + return PyMac_Error(_err); buffer = PyMem_NEW(char, bufferSize); if (buffer == NULL) - return PyErr_NoMemory(); + return PyErr_NoMemory(); _err = GetControlData(_self->ob_itself, - inPart, - inTagName, - bufferSize, + inPart, + inTagName, + bufferSize, buffer, &outSize); if (_err != noErr) { - PyMem_DEL(buffer); - return PyMac_Error(_err); + PyMem_DEL(buffer); + return PyMac_Error(_err); } _res = Py_BuildValue("s#", buffer, outSize); PyMem_DEL(buffer); @@ -3386,16 +3387,16 @@ &inPart, PyMac_GetOSType, &inTagName, OptResObj_Convert, &buffer)) - return NULL; + return NULL; _err = SetControlData(_self->ob_itself, - inPart, - inTagName, - sizeof(buffer), + inPart, + inTagName, + sizeof(buffer), (Ptr)&buffer); if (_err != noErr) - return PyMac_Error(_err); + return PyMac_Error(_err); _res = Py_None; return _res; @@ -3414,29 +3415,29 @@ if (!PyArg_ParseTuple(_args, "hO&", &inPart, PyMac_GetOSType, &inTagName)) - return NULL; + return NULL; /* Check it is handle-sized */ _err = GetControlDataSize(_self->ob_itself, - inPart, - inTagName, + inPart, + inTagName, &bufferSize); if (_err != noErr) - return PyMac_Error(_err); + return PyMac_Error(_err); if (bufferSize != sizeof(Handle)) { - PyErr_SetString(Ctl_Error, "GetControlDataSize() != sizeof(Handle)"); - return NULL; + PyErr_SetString(Ctl_Error, "GetControlDataSize() != sizeof(Handle)"); + return NULL; } _err = GetControlData(_self->ob_itself, - inPart, - inTagName, - sizeof(Handle), + inPart, + inTagName, + sizeof(Handle), (Ptr)&hdl, &bufferSize); if (_err != noErr) { - return PyMac_Error(_err); + return PyMac_Error(_err); } _res = Py_BuildValue("O&", OptResObj_New, hdl); return _res; @@ -3457,18 +3458,18 @@ &inPart, PyMac_GetOSType, &inTagName, &callback)) - return NULL; + return NULL; if ( setcallback((PyObject *)_self, inTagName, callback, &c_callback) < 0 ) - return NULL; + return NULL; _err = SetControlData(_self->ob_itself, - inPart, - inTagName, - sizeof(c_callback), + inPart, + inTagName, + sizeof(c_callback), (Ptr)&c_callback); if (_err != noErr) - return PyMac_Error(_err); + return PyMac_Error(_err); _res = Py_None; return _res; @@ -3848,16 +3849,16 @@ #define CtlObj_tp_alloc PyType_GenericAlloc -static PyObject *CtlObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *CtlObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; ControlHandle itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, CtlObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((ControlObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, CtlObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((ControlObject *)_self)->ob_itself = itself; + return _self; } #define CtlObj_tp_free PyObject_Del @@ -5379,26 +5380,26 @@ &size, &direction, &tabArrayObj)) - return NULL; + return NULL; i = PySequence_Length(tabArrayObj); if (i == -1) - return NULL; + return NULL; if (i > MAXTABS) { - PyErr_SetString(Ctl_Error, "Too many tabs"); - return NULL; + PyErr_SetString(Ctl_Error, "Too many tabs"); + return NULL; } numTabs = i; for (i=0; iob_itself = itself; - it->ob_callbackdict = NULL; - return (PyObject *)it; + ControlObject *it; + if (itself == NULL) return PyMac_Error(resNotFound); + it = PyObject_NEW(ControlObject, &Control_Type); + if (it == NULL) return NULL; + it->ob_itself = itself; + it->ob_callbackdict = NULL; + return (PyObject *)it; } static PyObject * CtlObj_WhichControl(ControlHandle c) { - PyObject *it; + PyObject *it; - if (c == NULL) - it = Py_None; - else { - it = (PyObject *) GetControlReference(c); - /* - ** If the refcon is zero or doesn't point back to the Python object - ** the control is not ours. Return a temporary object. - */ - if (it == NULL || ((ControlObject *)it)->ob_itself != c) - return CtlObj_NewUnmanaged(c); - } - Py_INCREF(it); - return it; + if (c == NULL) + it = Py_None; + else { + it = (PyObject *) GetControlReference(c); + /* + ** If the refcon is zero or doesn't point back to the Python object + ** the control is not ours. Return a temporary object. + */ + if (it == NULL || ((ControlObject *)it)->ob_itself != c) + return CtlObj_NewUnmanaged(c); + } + Py_INCREF(it); + return it; } static int settrackfunc(PyObject *obj) { - if (tracker) { - PyErr_SetString(Ctl_Error, "Tracker function in use"); - return 0; - } - tracker = obj; - Py_INCREF(tracker); - return 1; + if (tracker) { + PyErr_SetString(Ctl_Error, "Tracker function in use"); + return 0; + } + tracker = obj; + Py_INCREF(tracker); + return 1; } static void clrtrackfunc(void) { - Py_XDECREF(tracker); - tracker = 0; + Py_XDECREF(tracker); + tracker = 0; } static pascal void mytracker(ControlHandle ctl, short part) { - PyObject *args, *rv=0; + PyObject *args, *rv=0; - args = Py_BuildValue("(O&i)", CtlObj_WhichControl, ctl, (int)part); - if (args && tracker) { - rv = PyEval_CallObject(tracker, args); - Py_DECREF(args); - } - if (rv) - Py_DECREF(rv); - else { - PySys_WriteStderr("TrackControl or HandleControlClick: exception in tracker function\n"); - PyErr_Print(); - } + args = Py_BuildValue("(O&i)", CtlObj_WhichControl, ctl, (int)part); + if (args && tracker) { + rv = PyEval_CallObject(tracker, args); + Py_DECREF(args); + } + if (rv) + Py_DECREF(rv); + else { + PySys_WriteStderr("TrackControl or HandleControlClick: exception in tracker function\n"); + PyErr_Print(); + } } static int setcallback(PyObject *myself, OSType which, PyObject *callback, UniversalProcPtr *uppp) { - ControlObject *self = (ControlObject *)myself; - char keybuf[9]; - - if ( which == kMyControlActionProcTag ) - *uppp = (UniversalProcPtr)myactionproc_upp; - else if ( which == kControlUserPaneKeyDownProcTag ) - *uppp = (UniversalProcPtr)mykeydownproc_upp; - else if ( which == kControlUserPaneFocusProcTag ) - *uppp = (UniversalProcPtr)myfocusproc_upp; - else if ( which == kControlUserPaneDrawProcTag ) - *uppp = (UniversalProcPtr)mydrawproc_upp; - else if ( which == kControlUserPaneIdleProcTag ) - *uppp = (UniversalProcPtr)myidleproc_upp; - else if ( which == kControlUserPaneHitTestProcTag ) - *uppp = (UniversalProcPtr)myhittestproc_upp; - else if ( which == kControlUserPaneTrackingProcTag ) - *uppp = (UniversalProcPtr)mytrackingproc_upp; - else - return -1; - /* Only now do we test for clearing of the callback: */ - if ( callback == Py_None ) - *uppp = NULL; - /* Create the dict if it doesn't exist yet (so we don't get such a dict for every control) */ - if ( self->ob_callbackdict == NULL ) - if ( (self->ob_callbackdict = PyDict_New()) == NULL ) - return -1; - /* And store the Python callback */ - sprintf(keybuf, "%x", (unsigned)which); - if (PyDict_SetItemString(self->ob_callbackdict, keybuf, callback) < 0) - return -1; - return 0; + ControlObject *self = (ControlObject *)myself; + char keybuf[9]; + + if ( which == kMyControlActionProcTag ) + *uppp = (UniversalProcPtr)myactionproc_upp; + else if ( which == kControlUserPaneKeyDownProcTag ) + *uppp = (UniversalProcPtr)mykeydownproc_upp; + else if ( which == kControlUserPaneFocusProcTag ) + *uppp = (UniversalProcPtr)myfocusproc_upp; + else if ( which == kControlUserPaneDrawProcTag ) + *uppp = (UniversalProcPtr)mydrawproc_upp; + else if ( which == kControlUserPaneIdleProcTag ) + *uppp = (UniversalProcPtr)myidleproc_upp; + else if ( which == kControlUserPaneHitTestProcTag ) + *uppp = (UniversalProcPtr)myhittestproc_upp; + else if ( which == kControlUserPaneTrackingProcTag ) + *uppp = (UniversalProcPtr)mytrackingproc_upp; + else + return -1; + /* Only now do we test for clearing of the callback: */ + if ( callback == Py_None ) + *uppp = NULL; + /* Create the dict if it doesn't exist yet (so we don't get such a dict for every control) */ + if ( self->ob_callbackdict == NULL ) + if ( (self->ob_callbackdict = PyDict_New()) == NULL ) + return -1; + /* And store the Python callback */ + sprintf(keybuf, "%x", (unsigned)which); + if (PyDict_SetItemString(self->ob_callbackdict, keybuf, callback) < 0) + return -1; + return 0; } static PyObject * callcallback(ControlObject *self, OSType which, PyObject *arglist) { - char keybuf[9]; - PyObject *func, *rv; - - sprintf(keybuf, "%x", (unsigned)which); - if ( self->ob_callbackdict == NULL || - (func = PyDict_GetItemString(self->ob_callbackdict, keybuf)) == NULL ) { - PySys_WriteStderr("Control callback %x without callback object\n", (unsigned)which); - return NULL; - } - rv = PyEval_CallObject(func, arglist); - if ( rv == NULL ) { - PySys_WriteStderr("Exception in control callback %x handler\n", (unsigned)which); - PyErr_Print(); - } - return rv; + char keybuf[9]; + PyObject *func, *rv; + + sprintf(keybuf, "%x", (unsigned)which); + if ( self->ob_callbackdict == NULL || + (func = PyDict_GetItemString(self->ob_callbackdict, keybuf)) == NULL ) { + PySys_WriteStderr("Control callback %x without callback object\n", (unsigned)which); + return NULL; + } + rv = PyEval_CallObject(func, arglist); + if ( rv == NULL ) { + PySys_WriteStderr("Exception in control callback %x handler\n", (unsigned)which); + PyErr_Print(); + } + return rv; } static pascal void myactionproc(ControlHandle control, SInt16 part) { - ControlObject *ctl_obj; - PyObject *arglist, *rv; - - ctl_obj = (ControlObject *)CtlObj_WhichControl(control); - arglist = Py_BuildValue("Oh", ctl_obj, part); - rv = callcallback(ctl_obj, kMyControlActionProcTag, arglist); - Py_XDECREF(arglist); - Py_XDECREF(rv); + ControlObject *ctl_obj; + PyObject *arglist, *rv; + + ctl_obj = (ControlObject *)CtlObj_WhichControl(control); + arglist = Py_BuildValue("Oh", ctl_obj, part); + rv = callcallback(ctl_obj, kMyControlActionProcTag, arglist); + Py_XDECREF(arglist); + Py_XDECREF(rv); } static pascal ControlPartCode mykeydownproc(ControlHandle control, SInt16 keyCode, SInt16 charCode, SInt16 modifiers) { - ControlObject *ctl_obj; - PyObject *arglist, *rv; - short c_rv = 0; - - ctl_obj = (ControlObject *)CtlObj_WhichControl(control); - arglist = Py_BuildValue("Ohhh", ctl_obj, keyCode, charCode, modifiers); - rv = callcallback(ctl_obj, kControlUserPaneKeyDownProcTag, arglist); - Py_XDECREF(arglist); - if ( rv ) - if (!PyArg_Parse(rv, "h", &c_rv)) - PyErr_Clear(); - Py_XDECREF(rv); - return (ControlPartCode)c_rv; + ControlObject *ctl_obj; + PyObject *arglist, *rv; + short c_rv = 0; + + ctl_obj = (ControlObject *)CtlObj_WhichControl(control); + arglist = Py_BuildValue("Ohhh", ctl_obj, keyCode, charCode, modifiers); + rv = callcallback(ctl_obj, kControlUserPaneKeyDownProcTag, arglist); + Py_XDECREF(arglist); + if ( rv ) + if (!PyArg_Parse(rv, "h", &c_rv)) + PyErr_Clear(); + Py_XDECREF(rv); + return (ControlPartCode)c_rv; } static pascal ControlPartCode myfocusproc(ControlHandle control, ControlPartCode part) { - ControlObject *ctl_obj; - PyObject *arglist, *rv; - short c_rv = kControlFocusNoPart; - - ctl_obj = (ControlObject *)CtlObj_WhichControl(control); - arglist = Py_BuildValue("Oh", ctl_obj, part); - rv = callcallback(ctl_obj, kControlUserPaneFocusProcTag, arglist); - Py_XDECREF(arglist); - if ( rv ) - if (!PyArg_Parse(rv, "h", &c_rv)) - PyErr_Clear(); - Py_XDECREF(rv); - return (ControlPartCode)c_rv; + ControlObject *ctl_obj; + PyObject *arglist, *rv; + short c_rv = kControlFocusNoPart; + + ctl_obj = (ControlObject *)CtlObj_WhichControl(control); + arglist = Py_BuildValue("Oh", ctl_obj, part); + rv = callcallback(ctl_obj, kControlUserPaneFocusProcTag, arglist); + Py_XDECREF(arglist); + if ( rv ) + if (!PyArg_Parse(rv, "h", &c_rv)) + PyErr_Clear(); + Py_XDECREF(rv); + return (ControlPartCode)c_rv; } static pascal void mydrawproc(ControlHandle control, SInt16 part) { - ControlObject *ctl_obj; - PyObject *arglist, *rv; - - ctl_obj = (ControlObject *)CtlObj_WhichControl(control); - arglist = Py_BuildValue("Oh", ctl_obj, part); - rv = callcallback(ctl_obj, kControlUserPaneDrawProcTag, arglist); - Py_XDECREF(arglist); - Py_XDECREF(rv); + ControlObject *ctl_obj; + PyObject *arglist, *rv; + + ctl_obj = (ControlObject *)CtlObj_WhichControl(control); + arglist = Py_BuildValue("Oh", ctl_obj, part); + rv = callcallback(ctl_obj, kControlUserPaneDrawProcTag, arglist); + Py_XDECREF(arglist); + Py_XDECREF(rv); } static pascal void myidleproc(ControlHandle control) { - ControlObject *ctl_obj; - PyObject *arglist, *rv; - - ctl_obj = (ControlObject *)CtlObj_WhichControl(control); - arglist = Py_BuildValue("O", ctl_obj); - rv = callcallback(ctl_obj, kControlUserPaneIdleProcTag, arglist); - Py_XDECREF(arglist); - Py_XDECREF(rv); + ControlObject *ctl_obj; + PyObject *arglist, *rv; + + ctl_obj = (ControlObject *)CtlObj_WhichControl(control); + arglist = Py_BuildValue("O", ctl_obj); + rv = callcallback(ctl_obj, kControlUserPaneIdleProcTag, arglist); + Py_XDECREF(arglist); + Py_XDECREF(rv); } static pascal ControlPartCode myhittestproc(ControlHandle control, Point where) { - ControlObject *ctl_obj; - PyObject *arglist, *rv; - short c_rv = -1; + ControlObject *ctl_obj; + PyObject *arglist, *rv; + short c_rv = -1; - ctl_obj = (ControlObject *)CtlObj_WhichControl(control); - arglist = Py_BuildValue("OO&", ctl_obj, PyMac_BuildPoint, where); - rv = callcallback(ctl_obj, kControlUserPaneHitTestProcTag, arglist); - Py_XDECREF(arglist); - /* Ignore errors, nothing we can do about them */ - if ( rv ) - if (!PyArg_Parse(rv, "h", &c_rv)) - PyErr_Clear(); - Py_XDECREF(rv); - return (ControlPartCode)c_rv; + ctl_obj = (ControlObject *)CtlObj_WhichControl(control); + arglist = Py_BuildValue("OO&", ctl_obj, PyMac_BuildPoint, where); + rv = callcallback(ctl_obj, kControlUserPaneHitTestProcTag, arglist); + Py_XDECREF(arglist); + /* Ignore errors, nothing we can do about them */ + if ( rv ) + if (!PyArg_Parse(rv, "h", &c_rv)) + PyErr_Clear(); + Py_XDECREF(rv); + return (ControlPartCode)c_rv; } static pascal ControlPartCode mytrackingproc(ControlHandle control, Point startPt, ControlActionUPP actionProc) { - ControlObject *ctl_obj; - PyObject *arglist, *rv; - short c_rv = -1; + ControlObject *ctl_obj; + PyObject *arglist, *rv; + short c_rv = -1; - ctl_obj = (ControlObject *)CtlObj_WhichControl(control); - /* We cannot pass the actionProc without lots of work */ - arglist = Py_BuildValue("OO&", ctl_obj, PyMac_BuildPoint, startPt); - rv = callcallback(ctl_obj, kControlUserPaneTrackingProcTag, arglist); - Py_XDECREF(arglist); - if ( rv ) - if (!PyArg_Parse(rv, "h", &c_rv)) - PyErr_Clear(); - Py_XDECREF(rv); - return (ControlPartCode)c_rv; + ctl_obj = (ControlObject *)CtlObj_WhichControl(control); + /* We cannot pass the actionProc without lots of work */ + arglist = Py_BuildValue("OO&", ctl_obj, PyMac_BuildPoint, startPt); + rv = callcallback(ctl_obj, kControlUserPaneTrackingProcTag, arglist); + Py_XDECREF(arglist); + if ( rv ) + if (!PyArg_Parse(rv, "h", &c_rv)) + PyErr_Clear(); + Py_XDECREF(rv); + return (ControlPartCode)c_rv; } From jhylton at users.sourceforge.net Sun Oct 16 07:24:36 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:36 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/qd _Qdmodule.c, 1.9.2.2, 1.9.2.3 Message-ID: <20051016052436.7FECA1E400E@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/qd In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/qd Modified Files: Tag: ast-branch _Qdmodule.c Log Message: Merge head to branch (for the last time) Index: _Qdmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/qd/_Qdmodule.c,v retrieving revision 1.9.2.2 retrieving revision 1.9.2.3 diff -u -d -r1.9.2.2 -r1.9.2.3 --- _Qdmodule.c 7 Jan 2005 07:01:38 -0000 1.9.2.2 +++ _Qdmodule.c 16 Oct 2005 05:24:02 -0000 1.9.2.3 @@ -41,19 +41,19 @@ PyObject *QdRGB_New(RGBColorPtr itself) { - return Py_BuildValue("lll", (long)itself->red, (long)itself->green, (long)itself->blue); + return Py_BuildValue("lll", (long)itself->red, (long)itself->green, (long)itself->blue); } int QdRGB_Convert(PyObject *v, RGBColorPtr p_itself) { - long red, green, blue; - - if( !PyArg_ParseTuple(v, "lll", &red, &green, &blue) ) - return 0; - p_itself->red = (unsigned short)red; - p_itself->green = (unsigned short)green; - p_itself->blue = (unsigned short)blue; - return 1; + long red, green, blue; + + if( !PyArg_ParseTuple(v, "lll", &red, &green, &blue) ) + return 0; + p_itself->red = (unsigned short)red; + p_itself->green = (unsigned short)green; + p_itself->blue = (unsigned short)blue; + return 1; } /* @@ -63,8 +63,8 @@ PyObject *QdFI_New(FontInfo *itself) { - return Py_BuildValue("hhhh", itself->ascent, itself->descent, - itself->widMax, itself->leading); + return Py_BuildValue("hhhh", itself->ascent, itself->descent, + itself->widMax, itself->leading); } static PyObject *Qd_Error; @@ -89,6 +89,7 @@ it->ob_itself = itself; return (PyObject *)it; } + int GrafObj_Convert(PyObject *v, GrafPtr *p_itself) { #if 1 @@ -1310,8 +1311,8 @@ static PyObject *GrafObj_get_visRgn(GrafPortObject *self, void *closure) { RgnHandle h=NewRgn(); /* XXXX wrong dispose routine */ - return Py_BuildValue("O&", ResObj_New, (Handle)GetPortVisibleRegion(self->ob_itself, h)); - + return Py_BuildValue("O&", ResObj_New, (Handle)GetPortVisibleRegion(self->ob_itself, h)); + } #define GrafObj_set_visRgn NULL @@ -1319,8 +1320,8 @@ static PyObject *GrafObj_get_clipRgn(GrafPortObject *self, void *closure) { RgnHandle h=NewRgn(); /* XXXX wrong dispose routine */ - return Py_BuildValue("O&", ResObj_New, (Handle)GetPortClipRegion(self->ob_itself, h)); - + return Py_BuildValue("O&", ResObj_New, (Handle)GetPortClipRegion(self->ob_itself, h)); + } #define GrafObj_set_clipRgn NULL @@ -1341,16 +1342,16 @@ #define GrafObj_tp_alloc PyType_GenericAlloc -static PyObject *GrafObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *GrafObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; GrafPtr itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, GrafObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((GrafPortObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, GrafObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((GrafPortObject *)_self)->ob_itself = itself; + return _self; } #define GrafObj_tp_free PyObject_Del @@ -1427,6 +1428,7 @@ it->referred_bitmap = NULL; return (PyObject *)it; } + int BMObj_Convert(PyObject *v, BitMapPtr *p_itself) { if (!BMObj_Check(v)) @@ -1453,7 +1455,7 @@ char *cp; if ( !PyArg_ParseTuple(_args, "ii", &from, &length) ) - return NULL; + return NULL; cp = _self->ob_itself->baseAddr+from; _res = PyString_FromStringAndSize(cp, length); return _res; @@ -1468,7 +1470,7 @@ char *cp, *icp; if ( !PyArg_ParseTuple(_args, "is#", &from, &icp, &length) ) - return NULL; + return NULL; cp = _self->ob_itself->baseAddr+from; memcpy(cp, icp, length); Py_INCREF(Py_None); @@ -1539,16 +1541,16 @@ #define BMObj_tp_alloc PyType_GenericAlloc -static PyObject *BMObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *BMObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; BitMapPtr itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, BMObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((BitMapObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, BMObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((BitMapObject *)_self)->ob_itself = itself; + return _self; } #define BMObj_tp_free PyObject_Del @@ -5822,6 +5824,7 @@ { PyObject *_res = NULL; char *textBuf__in__; + int textBuf__len__; int textBuf__in_len__; short firstByte; short byteCount; @@ -5882,6 +5885,7 @@ PyObject *_res = NULL; short _rv; char *textBuf__in__; + int textBuf__len__; int textBuf__in_len__; short firstByte; short byteCount; @@ -6467,6 +6471,7 @@ { PyObject *_res = NULL; char *textBuf__in__; + int textBuf__len__; int textBuf__in_len__; short firstByte; short byteCount; @@ -6499,17 +6504,17 @@ char *data; if ( !PyArg_ParseTuple(_args, "O!iO&", &PyString_Type, &source, &rowbytes, PyMac_GetRect, - &bounds) ) - return NULL; + &bounds) ) + return NULL; data = PyString_AsString(source); if ((ptr=(BitMap *)malloc(sizeof(BitMap))) == NULL ) - return PyErr_NoMemory(); + return PyErr_NoMemory(); ptr->baseAddr = (Ptr)data; ptr->rowBytes = rowbytes; ptr->bounds = bounds; if ( (_res = BMObj_New(ptr)) == NULL ) { - free(ptr); - return NULL; + free(ptr); + return NULL; } ((BitMapObject *)_res)->referred_object = source; Py_INCREF(source); @@ -6526,16 +6531,16 @@ PyObject *source; if ( !PyArg_ParseTuple(_args, "O!", &PyString_Type, &source) ) - return NULL; + return NULL; if ( PyString_Size(source) != sizeof(BitMap) && PyString_Size(source) != sizeof(PixMap) ) { - PyErr_Format(PyExc_TypeError, - "Argument size was %d, should be %d (sizeof BitMap) or %d (sizeof PixMap)", - PyString_Size(source), sizeof(BitMap), sizeof(PixMap)); - return NULL; + PyErr_Format(PyExc_TypeError, + "Argument size was %d, should be %d (sizeof BitMap) or %d (sizeof PixMap)", + PyString_Size(source), sizeof(BitMap), sizeof(PixMap)); + return NULL; } ptr = (BitMapPtr)PyString_AsString(source); if ( (_res = BMObj_New(ptr)) == NULL ) { - return NULL; + return NULL; } ((BitMapObject *)_res)->referred_object = source; Py_INCREF(source); @@ -7088,15 +7093,15 @@ */ PyObject *BMObj_NewCopied(BitMapPtr itself) { - BitMapObject *it; - BitMapPtr itself_copy; - - if ((itself_copy=(BitMapPtr)malloc(sizeof(BitMap))) == NULL) - return PyErr_NoMemory(); - *itself_copy = *itself; - it = (BitMapObject *)BMObj_New(itself_copy); - it->referred_bitmap = itself_copy; - return (PyObject *)it; + BitMapObject *it; + BitMapPtr itself_copy; + + if ((itself_copy=(BitMapPtr)malloc(sizeof(BitMap))) == NULL) + return PyErr_NoMemory(); + *itself_copy = *itself; + it = (BitMapObject *)BMObj_New(itself_copy); + it->referred_bitmap = itself_copy; + return (PyObject *)it; } @@ -7108,12 +7113,12 @@ - PyMac_INIT_TOOLBOX_OBJECT_NEW(BitMapPtr, BMObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(BitMapPtr, BMObj_Convert); - PyMac_INIT_TOOLBOX_OBJECT_NEW(GrafPtr, GrafObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(GrafPtr, GrafObj_Convert); - PyMac_INIT_TOOLBOX_OBJECT_NEW(RGBColorPtr, QdRGB_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(RGBColor, QdRGB_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(BitMapPtr, BMObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(BitMapPtr, BMObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(GrafPtr, GrafObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(GrafPtr, GrafObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(RGBColorPtr, QdRGB_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(RGBColor, QdRGB_Convert); m = Py_InitModule("_Qd", Qd_methods); From jhylton at users.sourceforge.net Sun Oct 16 07:24:36 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:36 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/snd _Sndmodule.c, 1.8.2.2, 1.8.2.3 Message-ID: <20051016052436.8A0B61E4016@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/snd In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/snd Modified Files: Tag: ast-branch _Sndmodule.c Log Message: Merge head to branch (for the last time) Index: _Sndmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/snd/_Sndmodule.c,v retrieving revision 1.8.2.2 retrieving revision 1.8.2.3 diff -u -d -r1.8.2.2 -r1.8.2.3 --- _Sndmodule.c 7 Jan 2005 07:01:46 -0000 1.8.2.2 +++ _Sndmodule.c 16 Oct 2005 05:24:03 -0000 1.8.2.3 @@ -21,16 +21,16 @@ static int SndCmd_Convert(PyObject *v, SndCommand *pc) { - int len; - pc->param1 = 0; - pc->param2 = 0; - if (PyTuple_Check(v)) { - if (PyArg_ParseTuple(v, "h|hl", &pc->cmd, &pc->param1, &pc->param2)) - return 1; - PyErr_Clear(); - return PyArg_ParseTuple(v, "Hhs#", &pc->cmd, &pc->param1, &pc->param2, &len); - } - return PyArg_Parse(v, "H", &pc->cmd); + int len; + pc->param1 = 0; + pc->param2 = 0; + if (PyTuple_Check(v)) { + if (PyArg_ParseTuple(v, "h|hl", &pc->cmd, &pc->param1, &pc->param2)) + return 1; + PyErr_Clear(); + return PyArg_ParseTuple(v, "Hhs#", &pc->cmd, &pc->param1, &pc->param2, &len); + } + return PyArg_Parse(v, "H", &pc->cmd); } static pascal void SndCh_UserRoutine(SndChannelPtr chan, SndCommand *cmd); /* Forward */ @@ -352,9 +352,9 @@ static int SPBObj_set_completionRoutine(SPBObject *self, PyObject *v, void *closure) { self->ob_spb.completionRoutine = NewSICompletionUPP(SPB_completion); - self->ob_completion = v; - Py_INCREF(v); - return 0; + self->ob_completion = v; + Py_INCREF(v); + return 0; return 0; } @@ -1056,62 +1056,62 @@ static int SndCh_CallCallBack(void *arg) { - SndChannelObject *p = (SndChannelObject *)arg; - PyObject *args; - PyObject *res; - args = Py_BuildValue("(O(hhl))", - p, p->ob_cmd.cmd, p->ob_cmd.param1, p->ob_cmd.param2); - res = PyEval_CallObject(p->ob_callback, args); - Py_DECREF(args); - if (res == NULL) - return -1; - Py_DECREF(res); - return 0; + SndChannelObject *p = (SndChannelObject *)arg; + PyObject *args; + PyObject *res; + args = Py_BuildValue("(O(hhl))", + p, p->ob_cmd.cmd, p->ob_cmd.param1, p->ob_cmd.param2); + res = PyEval_CallObject(p->ob_callback, args); + Py_DECREF(args); + if (res == NULL) + return -1; + Py_DECREF(res); + return 0; } /* Routine passed to NewSndChannel -- schedule a call to SndCh_CallCallBack */ static pascal void SndCh_UserRoutine(SndChannelPtr chan, SndCommand *cmd) { - SndChannelObject *p = (SndChannelObject *)(chan->userInfo); - if (p->ob_callback != NULL) { - long A5 = SetA5(p->ob_A5); - p->ob_cmd = *cmd; - Py_AddPendingCall(SndCh_CallCallBack, (void *)p); - SetA5(A5); - } + SndChannelObject *p = (SndChannelObject *)(chan->userInfo); + if (p->ob_callback != NULL) { + long A5 = SetA5(p->ob_A5); + p->ob_cmd = *cmd; + Py_AddPendingCall(SndCh_CallCallBack, (void *)p); + SetA5(A5); + } } /* SPB callbacks - Schedule callbacks to Python */ static int SPB_CallCallBack(void *arg) { - SPBObject *p = (SPBObject *)arg; - PyObject *args; - PyObject *res; - - if ( p->ob_thiscallback == 0 ) return 0; - args = Py_BuildValue("(O)", p); - res = PyEval_CallObject(p->ob_thiscallback, args); - p->ob_thiscallback = 0; - Py_DECREF(args); - if (res == NULL) - return -1; - Py_DECREF(res); - return 0; + SPBObject *p = (SPBObject *)arg; + PyObject *args; + PyObject *res; + + if ( p->ob_thiscallback == 0 ) return 0; + args = Py_BuildValue("(O)", p); + res = PyEval_CallObject(p->ob_thiscallback, args); + p->ob_thiscallback = 0; + Py_DECREF(args); + if (res == NULL) + return -1; + Py_DECREF(res); + return 0; } static pascal void SPB_completion(SPBPtr my_spb) { - SPBObject *p = (SPBObject *)(my_spb->userLong); - - if (p && p->ob_completion) { - long A5 = SetA5(p->ob_A5); - p->ob_thiscallback = p->ob_completion; /* Hope we cannot get two at the same time */ - Py_AddPendingCall(SPB_CallCallBack, (void *)p); - SetA5(A5); - } + SPBObject *p = (SPBObject *)(my_spb->userLong); + + if (p && p->ob_completion) { + long A5 = SetA5(p->ob_A5); + p->ob_thiscallback = p->ob_completion; /* Hope we cannot get two at the same time */ + Py_AddPendingCall(SPB_CallCallBack, (void *)p); + SetA5(A5); + } } From jhylton at users.sourceforge.net Sun Oct 16 07:24:36 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:36 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/OSX fixapplepython23.py, 1.3.6.1, 1.3.6.2 Message-ID: <20051016052436.B1C031E4011@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/OSX In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/OSX Modified Files: Tag: ast-branch fixapplepython23.py Log Message: Merge head to branch (for the last time) Index: fixapplepython23.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/OSX/fixapplepython23.py,v retrieving revision 1.3.6.1 retrieving revision 1.3.6.2 diff -u -d -r1.3.6.1 -r1.3.6.2 --- fixapplepython23.py 7 Jan 2005 07:02:06 -0000 1.3.6.1 +++ fixapplepython23.py 16 Oct 2005 05:24:03 -0000 1.3.6.2 @@ -43,12 +43,12 @@ if lines[i][:len(start)] == start: return i return -1 - + def fix(makefile, do_apply): """Fix the Makefile, if required.""" fixed = False lines = open(makefile).readlines() - + for old, new in CHANGES: i = findline(lines, new) if i >= 0: @@ -61,7 +61,7 @@ return 2 lines[i] = new fixed = True - + if fixed: if do_apply: print 'fixapplepython23: Fix to Apple-installed Python 2.3 applied' @@ -74,7 +74,7 @@ else: print 'fixapplepython23: No fix needed, appears to have been applied before' return 0 - + def makescript(filename, compiler): """Create a wrapper script for a compiler""" dirname = os.path.split(filename)[0] @@ -85,7 +85,7 @@ fp.close() os.chmod(filename, 0755) print 'fixapplepython23: Created', filename - + def main(): # Check for -n option if len(sys.argv) > 1 and sys.argv[1] == '-n': @@ -113,7 +113,6 @@ # Finally fix the makefile rv = fix(MAKEFILE, do_apply) sys.exit(rv) - + if __name__ == '__main__': main() - From jhylton at users.sourceforge.net Sun Oct 16 07:24:36 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:36 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/res _Resmodule.c, 1.12.2.2, 1.12.2.3 ressupport.py, 1.21.2.2, 1.21.2.3 Message-ID: <20051016052436.955661E4017@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/res In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/res Modified Files: Tag: ast-branch _Resmodule.c ressupport.py Log Message: Merge head to branch (for the last time) Index: _Resmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/res/_Resmodule.c,v retrieving revision 1.12.2.2 retrieving revision 1.12.2.3 diff -u -d -r1.12.2.2 -r1.12.2.3 --- _Resmodule.c 7 Jan 2005 07:01:44 -0000 1.12.2.2 +++ _Resmodule.c 16 Oct 2005 05:24:03 -0000 1.12.2.3 @@ -32,7 +32,7 @@ static void PyMac_AutoDisposeHandle(Handle h) { - DisposeHandle(h); + DisposeHandle(h); } static PyObject *Res_Error; @@ -59,6 +59,7 @@ it->ob_freeit = NULL; return (PyObject *)it; } + int ResObj_Convert(PyObject *v, Handle *p_itself) { if (!ResObj_Check(v)) @@ -455,13 +456,13 @@ int onoff, old = 0; if (!PyArg_ParseTuple(_args, "i", &onoff)) - return NULL; + return NULL; if ( _self->ob_freeit ) - old = 1; + old = 1; if ( onoff ) - _self->ob_freeit = PyMac_AutoDisposeHandle; + _self->ob_freeit = PyMac_AutoDisposeHandle; else - _self->ob_freeit = NULL; + _self->ob_freeit = NULL; _res = Py_BuildValue("i", old); return _res; @@ -514,42 +515,42 @@ static PyObject *ResObj_get_data(ResourceObject *self, void *closure) { - PyObject *res; - char state; + PyObject *res; + char state; - state = HGetState(self->ob_itself); - HLock(self->ob_itself); - res = PyString_FromStringAndSize( - *self->ob_itself, - GetHandleSize(self->ob_itself)); - HUnlock(self->ob_itself); - HSetState(self->ob_itself, state); - return res; - + state = HGetState(self->ob_itself); + HLock(self->ob_itself); + res = PyString_FromStringAndSize( + *self->ob_itself, + GetHandleSize(self->ob_itself)); + HUnlock(self->ob_itself); + HSetState(self->ob_itself, state); + return res; + } static int ResObj_set_data(ResourceObject *self, PyObject *v, void *closure) { - char *data; - long size; - - if ( v == NULL ) - return -1; - if ( !PyString_Check(v) ) - return -1; - size = PyString_Size(v); - data = PyString_AsString(v); - /* XXXX Do I need the GetState/SetState calls? */ - SetHandleSize(self->ob_itself, size); - if ( MemError()) - return -1; - HLock(self->ob_itself); - memcpy((char *)*self->ob_itself, data, size); - HUnlock(self->ob_itself); - /* XXXX Should I do the Changed call immedeately? */ - return 0; - + char *data; + long size; + + if ( v == NULL ) + return -1; + if ( !PyString_Check(v) ) + return -1; + size = PyString_Size(v); + data = PyString_AsString(v); + /* XXXX Do I need the GetState/SetState calls? */ + SetHandleSize(self->ob_itself, size); + if ( MemError()) + return -1; + HLock(self->ob_itself); + memcpy((char *)*self->ob_itself, data, size); + HUnlock(self->ob_itself); + /* XXXX Should I do the Changed call immedeately? */ + return 0; + return 0; } @@ -572,26 +573,26 @@ #define ResObj_repr NULL #define ResObj_hash NULL -static int ResObj_tp_init(PyObject *self, PyObject *args, PyObject *kwds) +static int ResObj_tp_init(PyObject *_self, PyObject *_args, PyObject *_kwds) { char *srcdata = NULL; int srclen = 0; Handle itself; char *kw[] = {"itself", 0}; - if (PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, ResObj_Convert, &itself)) + if (PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, ResObj_Convert, &itself)) { - ((ResourceObject *)self)->ob_itself = itself; + ((ResourceObject *)_self)->ob_itself = itself; return 0; } PyErr_Clear(); - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s#", kw, &srcdata, &srclen)) return -1; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "|s#", kw, &srcdata, &srclen)) return -1; if ((itself = NewHandle(srclen)) == NULL) { PyErr_NoMemory(); return 0; } - ((ResourceObject *)self)->ob_itself = itself; + ((ResourceObject *)_self)->ob_itself = itself; if (srclen && srcdata) { HLock(itself); @@ -603,7 +604,7 @@ #define ResObj_tp_alloc PyType_GenericAlloc -static PyObject *ResObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *ResObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { PyObject *self; if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; @@ -1573,11 +1574,11 @@ ResourceObject *rv; if (!PyArg_ParseTuple(_args, "s#", &buf, &len)) - return NULL; + return NULL; h = NewHandle(len); if ( h == NULL ) { - PyErr_NoMemory(); - return NULL; + PyErr_NoMemory(); + return NULL; } HLock(h); memcpy(*h, buf, len); @@ -1679,35 +1680,35 @@ /* Alternative version of ResObj_New, which returns None for null argument */ PyObject *OptResObj_New(Handle itself) { - if (itself == NULL) { - Py_INCREF(Py_None); - return Py_None; - } - return ResObj_New(itself); + if (itself == NULL) { + Py_INCREF(Py_None); + return Py_None; + } + return ResObj_New(itself); } int OptResObj_Convert(PyObject *v, Handle *p_itself) { - PyObject *tmp; - - if ( v == Py_None ) { - *p_itself = NULL; - return 1; - } - if (ResObj_Check(v)) - { - *p_itself = ((ResourceObject *)v)->ob_itself; - return 1; - } - /* If it isn't a resource yet see whether it is convertible */ - if ( (tmp=PyObject_CallMethod(v, "as_Resource", "")) ) { - *p_itself = ((ResourceObject *)tmp)->ob_itself; - Py_DECREF(tmp); - return 1; - } - PyErr_Clear(); - PyErr_SetString(PyExc_TypeError, "Resource required"); - return 0; + PyObject *tmp; + + if ( v == Py_None ) { + *p_itself = NULL; + return 1; + } + if (ResObj_Check(v)) + { + *p_itself = ((ResourceObject *)v)->ob_itself; + return 1; + } + /* If it isn't a resource yet see whether it is convertible */ + if ( (tmp=PyObject_CallMethod(v, "as_Resource", "")) ) { + *p_itself = ((ResourceObject *)tmp)->ob_itself; + Py_DECREF(tmp); + return 1; + } + PyErr_Clear(); + PyErr_SetString(PyExc_TypeError, "Resource required"); + return 0; } @@ -1718,10 +1719,10 @@ - PyMac_INIT_TOOLBOX_OBJECT_NEW(Handle, ResObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(Handle, ResObj_Convert); - PyMac_INIT_TOOLBOX_OBJECT_NEW(Handle, OptResObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(Handle, OptResObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(Handle, ResObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(Handle, ResObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(Handle, OptResObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(Handle, OptResObj_Convert); m = Py_InitModule("_Res", Res_methods); Index: ressupport.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/res/ressupport.py,v retrieving revision 1.21.2.2 retrieving revision 1.21.2.3 diff -u -d -r1.21.2.2 -r1.21.2.3 --- ressupport.py 7 Jan 2005 07:01:44 -0000 1.21.2.2 +++ ressupport.py 16 Oct 2005 05:24:03 -0000 1.21.2.3 @@ -182,20 +182,20 @@ Output("%s itself;", self.itselftype); Output("char *kw[] = {\"itself\", 0};") Output() - Output("if (PyArg_ParseTupleAndKeywords(args, kwds, \"O&\", kw, %s_Convert, &itself))", + Output("if (PyArg_ParseTupleAndKeywords(_args, _kwds, \"O&\", kw, %s_Convert, &itself))", self.prefix); OutLbrace() - Output("((%s *)self)->ob_itself = itself;", self.objecttype) + Output("((%s *)_self)->ob_itself = itself;", self.objecttype) Output("return 0;") OutRbrace() Output("PyErr_Clear();") - Output("if (!PyArg_ParseTupleAndKeywords(args, kwds, \"|s#\", kw, &srcdata, &srclen)) return -1;") + Output("if (!PyArg_ParseTupleAndKeywords(_args, _kwds, \"|s#\", kw, &srcdata, &srclen)) return -1;") Output("if ((itself = NewHandle(srclen)) == NULL)") OutLbrace() Output("PyErr_NoMemory();") Output("return 0;") OutRbrace() - Output("((%s *)self)->ob_itself = itself;", self.objecttype) + Output("((%s *)_self)->ob_itself = itself;", self.objecttype) # XXXX Output("((%s *)self)->ob_freeit = PyMac_AutoDisposeHandle;") Output("if (srclen && srcdata)") OutLbrace() From jhylton at users.sourceforge.net Sun Oct 16 07:24:36 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:36 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Tools/IDE PackageManager.py, 1.9.4.2, 1.9.4.3 Wbase.py, 1.9.8.2, 1.9.8.3 Message-ID: <20051016052436.D12531E4013@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Tools/IDE In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Tools/IDE Modified Files: Tag: ast-branch PackageManager.py Wbase.py Log Message: Merge head to branch (for the last time) Index: PackageManager.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Tools/IDE/PackageManager.py,v retrieving revision 1.9.4.2 retrieving revision 1.9.4.3 diff -u -d -r1.9.4.2 -r1.9.4.3 --- PackageManager.py 7 Jan 2005 07:02:21 -0000 1.9.4.2 +++ PackageManager.py 16 Oct 2005 05:24:03 -0000 1.9.4.3 @@ -181,6 +181,8 @@ filename = EasyDialogs.AskFileForOpen(typeList=("TEXT",)) if filename: filename = urllib.pathname2url(filename) + if filename[:5] != 'file:': + filename = 'file:' + filename self.opendoc(filename) def domenu_openURL(self, *args): Index: Wbase.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Tools/IDE/Wbase.py,v retrieving revision 1.9.8.2 retrieving revision 1.9.8.3 diff -u -d -r1.9.8.2 -r1.9.8.3 --- Wbase.py 7 Jan 2005 07:02:22 -0000 1.9.8.2 +++ Wbase.py 16 Oct 2005 05:24:03 -0000 1.9.8.3 @@ -128,28 +128,28 @@ # depending on the values of l(eft), t(op), r(right) and b(ottom), # they mean different things: if l < -1: - # l is less than -1, this mean it measures from the *right* of it's parent + # l is less than -1, this mean it measures from the *right* of its parent l = pr + l else: - # l is -1 or greater, this mean it measures from the *left* of it's parent + # l is -1 or greater, this mean it measures from the *left* of its parent l = pl + l if t < -1: - # t is less than -1, this mean it measures from the *bottom* of it's parent + # t is less than -1, this mean it measures from the *bottom* of its parent t = pb + t else: - # t is -1 or greater, this mean it measures from the *top* of it's parent + # t is -1 or greater, this mean it measures from the *top* of its parent t = pt + t if r > 1: # r is greater than 1, this means r is the *width* of the widget r = l + r else: - # r is less than 1, this means it measures from the *right* of it's parent + # r is less than 1, this means it measures from the *right* of its parent r = pr + r if b > 1: # b is greater than 1, this means b is the *height* of the widget b = t + b else: - # b is less than 1, this means it measures from the *bottom* of it's parent + # b is less than 1, this means it measures from the *bottom* of its parent b = pb + b self._bounds = (l, t, r, b) if oldbounds and oldbounds <> self._bounds: From jhylton at users.sourceforge.net Sun Oct 16 07:24:36 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:36 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/te _TEmodule.c, 1.9.2.2, 1.9.2.3 Message-ID: <20051016052436.B58B81E401B@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/te In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/te Modified Files: Tag: ast-branch _TEmodule.c Log Message: Merge head to branch (for the last time) Index: _TEmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/te/_TEmodule.c,v retrieving revision 1.9.2.2 retrieving revision 1.9.2.3 diff -u -d -r1.9.2.2 -r1.9.2.3 --- _TEmodule.c 7 Jan 2005 07:01:46 -0000 1.9.2.2 +++ _TEmodule.c 16 Oct 2005 05:24:03 -0000 1.9.2.3 @@ -35,21 +35,21 @@ TextStyle_New(TextStylePtr itself) { - return Py_BuildValue("lllO&", (long)itself->tsFont, (long)itself->tsFace, (long)itself->tsSize, QdRGB_New, - &itself->tsColor); + return Py_BuildValue("lllO&", (long)itself->tsFont, (long)itself->tsFace, (long)itself->tsSize, QdRGB_New, + &itself->tsColor); } static int TextStyle_Convert(PyObject *v, TextStylePtr p_itself) { - long font, face, size; - - if( !PyArg_ParseTuple(v, "lllO&", &font, &face, &size, QdRGB_Convert, &p_itself->tsColor) ) - return 0; - p_itself->tsFont = (short)font; - p_itself->tsFace = (Style)face; - p_itself->tsSize = (short)size; - return 1; + long font, face, size; + + if( !PyArg_ParseTuple(v, "lllO&", &font, &face, &size, QdRGB_Convert, &p_itself->tsColor) ) + return 0; + p_itself->tsFont = (short)font; + p_itself->tsFace = (Style)face; + p_itself->tsSize = (short)size; + return 1; } static PyObject *TE_Error; @@ -69,14 +69,15 @@ { TEObject *it; if (itself == NULL) { - PyErr_SetString(TE_Error,"Cannot create null TE"); - return NULL; - } + PyErr_SetString(TE_Error,"Cannot create null TE"); + return NULL; + } it = PyObject_NEW(TEObject, &TE_Type); if (it == NULL) return NULL; it->ob_itself = itself; return (PyObject *)it; } + int TEObj_Convert(PyObject *v, TEHandle *p_itself) { if (!TEObj_Check(v)) @@ -983,16 +984,16 @@ #define TEObj_tp_alloc PyType_GenericAlloc -static PyObject *TEObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *TEObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; TEHandle itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, TEObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((TEObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, TEObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((TEObject *)_self)->ob_itself = itself; + return _self; } #define TEObj_tp_free PyObject_Del @@ -1307,8 +1308,8 @@ - PyMac_INIT_TOOLBOX_OBJECT_NEW(TEHandle, TEObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(TEHandle, TEObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(TEHandle, TEObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(TEHandle, TEObj_Convert); m = Py_InitModule("_TE", TE_methods); From jhylton at users.sourceforge.net Sun Oct 16 07:24:36 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:36 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/win _Winmodule.c, 1.10.2.2, 1.10.2.3 Message-ID: <20051016052436.DFC751E4025@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/win In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/win Modified Files: Tag: ast-branch _Winmodule.c Log Message: Merge head to branch (for the last time) Index: _Winmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/win/_Winmodule.c,v retrieving revision 1.10.2.2 retrieving revision 1.10.2.3 diff -u -d -r1.10.2.2 -r1.10.2.3 --- _Winmodule.c 7 Jan 2005 07:02:05 -0000 1.10.2.2 +++ _Winmodule.c 16 Oct 2005 05:24:03 -0000 1.10.2.3 @@ -36,7 +36,7 @@ static void PyMac_AutoDisposeWindow(WindowPtr w) { - DisposeWindow(w); + DisposeWindow(w); } static PyObject *Win_Error; @@ -69,6 +69,7 @@ } return (PyObject *)it; } + int WinObj_Convert(PyObject *v, WindowPtr *p_itself) { @@ -2309,13 +2310,13 @@ int onoff, old = 0; if (!PyArg_ParseTuple(_args, "i", &onoff)) - return NULL; + return NULL; if ( _self->ob_freeit ) - old = 1; + old = 1; if ( onoff ) - _self->ob_freeit = PyMac_AutoDisposeWindow; + _self->ob_freeit = PyMac_AutoDisposeWindow; else - _self->ob_freeit = NULL; + _self->ob_freeit = NULL; _res = Py_BuildValue("i", old); return _res; @@ -2590,16 +2591,16 @@ #define WinObj_tp_alloc PyType_GenericAlloc -static PyObject *WinObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *WinObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; WindowPtr itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, WinObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((WindowObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, WinObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((WindowObject *)_self)->ob_itself = itself; + return _self; } #define WinObj_tp_free PyObject_Del @@ -3121,7 +3122,7 @@ long ptr; if ( !PyArg_ParseTuple(_args, "i", &ptr) ) - return NULL; + return NULL; _res = WinObj_WhichWindow((WindowPtr)ptr); return _res; @@ -3208,21 +3209,21 @@ PyObject * WinObj_WhichWindow(WindowPtr w) { - PyObject *it; - - if (w == NULL) { - it = Py_None; - Py_INCREF(it); - } else { - it = (PyObject *) GetWRefCon(w); - if (it == NULL || !IsPointerValid((Ptr)it) || ((WindowObject *)it)->ob_itself != w || !WinObj_Check(it)) { - it = WinObj_New(w); - ((WindowObject *)it)->ob_freeit = NULL; - } else { - Py_INCREF(it); - } - } - return it; + PyObject *it; + + if (w == NULL) { + it = Py_None; + Py_INCREF(it); + } else { + it = (PyObject *) GetWRefCon(w); + if (it == NULL || !IsPointerValid((Ptr)it) || ((WindowObject *)it)->ob_itself != w || !WinObj_Check(it)) { + it = WinObj_New(w); + ((WindowObject *)it)->ob_freeit = NULL; + } else { + Py_INCREF(it); + } + } + return it; } @@ -3233,9 +3234,9 @@ - PyMac_INIT_TOOLBOX_OBJECT_NEW(WindowPtr, WinObj_New); - PyMac_INIT_TOOLBOX_OBJECT_NEW(WindowPtr, WinObj_WhichWindow); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(WindowPtr, WinObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(WindowPtr, WinObj_New); + PyMac_INIT_TOOLBOX_OBJECT_NEW(WindowPtr, WinObj_WhichWindow); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(WindowPtr, WinObj_Convert); m = Py_InitModule("_Win", Win_methods); From jhylton at users.sourceforge.net Sun Oct 16 07:24:37 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:37 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/qt _Qtmodule.c, 1.8.2.2, 1.8.2.3 Message-ID: <20051016052437.0B79A1E4027@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/qt In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/qt Modified Files: Tag: ast-branch _Qtmodule.c Log Message: Merge head to branch (for the last time) Index: _Qtmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/qt/_Qtmodule.c,v retrieving revision 1.8.2.2 retrieving revision 1.8.2.3 diff -u -d -r1.8.2.2 -r1.8.2.3 --- _Qtmodule.c 7 Jan 2005 07:01:41 -0000 1.8.2.2 +++ _Qtmodule.c 16 Oct 2005 05:24:02 -0000 1.8.2.3 @@ -47,45 +47,45 @@ #endif /* Macro to allow us to GetNextInterestingTime without duration */ -#define GetMediaNextInterestingTimeOnly(media, flags, time, rate, rv) GetMediaNextInterestingTime(media, flags, time, rate, rv, NULL) - +#define GetMediaNextInterestingTimeOnly(media, flags, time, rate, rv) GetMediaNextInterestingTime(media, flags, time, rate, rv, NULL) + /* ** Parse/generate time records */ static PyObject * QtTimeRecord_New(TimeRecord *itself) { - if (itself->base) - return Py_BuildValue("O&lO&", PyMac_Buildwide, &itself->value, itself->scale, - TimeBaseObj_New, itself->base); - else - return Py_BuildValue("O&lO", PyMac_Buildwide, &itself->value, itself->scale, - Py_None); + if (itself->base) + return Py_BuildValue("O&lO&", PyMac_Buildwide, &itself->value, itself->scale, + TimeBaseObj_New, itself->base); + else + return Py_BuildValue("O&lO", PyMac_Buildwide, &itself->value, itself->scale, + Py_None); } static int QtTimeRecord_Convert(PyObject *v, TimeRecord *p_itself) { - PyObject *base = NULL; - if( !PyArg_ParseTuple(v, "O&l|O", PyMac_Getwide, &p_itself->value, &p_itself->scale, - &base) ) - return 0; - if ( base == NULL || base == Py_None ) - p_itself->base = NULL; - else - if ( !TimeBaseObj_Convert(base, &p_itself->base) ) - return 0; - return 1; + PyObject *base = NULL; + if( !PyArg_ParseTuple(v, "O&l|O", PyMac_Getwide, &p_itself->value, &p_itself->scale, + &base) ) + return 0; + if ( base == NULL || base == Py_None ) + p_itself->base = NULL; + else + if ( !TimeBaseObj_Convert(base, &p_itself->base) ) + return 0; + return 1; } static int QtMusicMIDIPacket_Convert(PyObject *v, MusicMIDIPacket *p_itself) { - int dummy; - - if( !PyArg_ParseTuple(v, "hls#", &p_itself->length, &p_itself->reserved, p_itself->data, dummy) ) - return 0; - return 1; + int dummy; + + if( !PyArg_ParseTuple(v, "hls#", &p_itself->length, &p_itself->reserved, p_itself->data, dummy) ) + return 0; + return 1; } @@ -108,14 +108,15 @@ { IdleManagerObject *it; if (itself == NULL) { - PyErr_SetString(Qt_Error,"Cannot create IdleManager from NULL pointer"); - return NULL; - } + PyErr_SetString(Qt_Error,"Cannot create IdleManager from NULL pointer"); + return NULL; + } it = PyObject_NEW(IdleManagerObject, &IdleManager_Type); if (it == NULL) return NULL; it->ob_itself = itself; return (PyObject *)it; } + int IdleManagerObj_Convert(PyObject *v, IdleManager *p_itself) { if (v == Py_None) @@ -154,16 +155,16 @@ #define IdleManagerObj_tp_alloc PyType_GenericAlloc -static PyObject *IdleManagerObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *IdleManagerObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; IdleManager itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, IdleManagerObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((IdleManagerObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, IdleManagerObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((IdleManagerObject *)_self)->ob_itself = itself; + return _self; } #define IdleManagerObj_tp_free PyObject_Del @@ -231,14 +232,15 @@ { MovieControllerObject *it; if (itself == NULL) { - PyErr_SetString(Qt_Error,"Cannot create MovieController from NULL pointer"); - return NULL; - } + PyErr_SetString(Qt_Error,"Cannot create MovieController from NULL pointer"); + return NULL; + } it = PyObject_NEW(MovieControllerObject, &MovieController_Type); if (it == NULL) return NULL; it->ob_itself = itself; return (PyObject *)it; } + int MovieCtlObj_Convert(PyObject *v, MovieController *p_itself) { if (v == Py_None) @@ -1273,16 +1275,16 @@ #define MovieCtlObj_tp_alloc PyType_GenericAlloc -static PyObject *MovieCtlObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *MovieCtlObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; MovieController itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, MovieCtlObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((MovieControllerObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, MovieCtlObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((MovieControllerObject *)_self)->ob_itself = itself; + return _self; } #define MovieCtlObj_tp_free PyObject_Del @@ -1350,14 +1352,15 @@ { TimeBaseObject *it; if (itself == NULL) { - PyErr_SetString(Qt_Error,"Cannot create TimeBase from NULL pointer"); - return NULL; - } + PyErr_SetString(Qt_Error,"Cannot create TimeBase from NULL pointer"); + return NULL; + } it = PyObject_NEW(TimeBaseObject, &TimeBase_Type); if (it == NULL) return NULL; it->ob_itself = itself; return (PyObject *)it; } + int TimeBaseObj_Convert(PyObject *v, TimeBase *p_itself) { if (v == Py_None) @@ -1766,16 +1769,16 @@ #define TimeBaseObj_tp_alloc PyType_GenericAlloc -static PyObject *TimeBaseObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *TimeBaseObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; TimeBase itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, TimeBaseObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((TimeBaseObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, TimeBaseObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((TimeBaseObject *)_self)->ob_itself = itself; + return _self; } #define TimeBaseObj_tp_free PyObject_Del @@ -1843,14 +1846,15 @@ { UserDataObject *it; if (itself == NULL) { - PyErr_SetString(Qt_Error,"Cannot create UserData from NULL pointer"); - return NULL; - } + PyErr_SetString(Qt_Error,"Cannot create UserData from NULL pointer"); + return NULL; + } it = PyObject_NEW(UserDataObject, &UserData_Type); if (it == NULL) return NULL; it->ob_itself = itself; return (PyObject *)it; } + int UserDataObj_Convert(PyObject *v, UserData *p_itself) { if (v == Py_None) @@ -2136,16 +2140,16 @@ #define UserDataObj_tp_alloc PyType_GenericAlloc -static PyObject *UserDataObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *UserDataObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; UserData itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, UserDataObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((UserDataObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, UserDataObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((UserDataObject *)_self)->ob_itself = itself; + return _self; } #define UserDataObj_tp_free PyObject_Del @@ -2213,14 +2217,15 @@ { MediaObject *it; if (itself == NULL) { - PyErr_SetString(Qt_Error,"Cannot create Media from NULL pointer"); - return NULL; - } + PyErr_SetString(Qt_Error,"Cannot create Media from NULL pointer"); + return NULL; + } it = PyObject_NEW(MediaObject, &Media_Type); if (it == NULL) return NULL; it->ob_itself = itself; return (PyObject *)it; } + int MediaObj_Convert(PyObject *v, Media *p_itself) { if (v == Py_None) @@ -3377,16 +3382,16 @@ #define MediaObj_tp_alloc PyType_GenericAlloc -static PyObject *MediaObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *MediaObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; Media itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, MediaObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((MediaObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, MediaObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((MediaObject *)_self)->ob_itself = itself; + return _self; } #define MediaObj_tp_free PyObject_Del @@ -3454,14 +3459,15 @@ { TrackObject *it; if (itself == NULL) { - PyErr_SetString(Qt_Error,"Cannot create Track from NULL pointer"); - return NULL; - } + PyErr_SetString(Qt_Error,"Cannot create Track from NULL pointer"); + return NULL; + } it = PyObject_NEW(TrackObject, &Track_Type); if (it == NULL) return NULL; it->ob_itself = itself; return (PyObject *)it; } + int TrackObj_Convert(PyObject *v, Track *p_itself) { if (v == Py_None) @@ -4724,16 +4730,16 @@ #define TrackObj_tp_alloc PyType_GenericAlloc -static PyObject *TrackObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *TrackObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; Track itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, TrackObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((TrackObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, TrackObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((TrackObject *)_self)->ob_itself = itself; + return _self; } #define TrackObj_tp_free PyObject_Del @@ -4801,14 +4807,15 @@ { MovieObject *it; if (itself == NULL) { - PyErr_SetString(Qt_Error,"Cannot create Movie from NULL pointer"); - return NULL; - } + PyErr_SetString(Qt_Error,"Cannot create Movie from NULL pointer"); + return NULL; + } it = PyObject_NEW(MovieObject, &Movie_Type); if (it == NULL) return NULL; it->ob_itself = itself; return (PyObject *)it; } + int MovieObj_Convert(PyObject *v, Movie *p_itself) { if (v == Py_None) @@ -7276,16 +7283,16 @@ #define MovieObj_tp_alloc PyType_GenericAlloc -static PyObject *MovieObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *MovieObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; Movie itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, MovieObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((MovieObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, MovieObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((MovieObject *)_self)->ob_itself = itself; + return _self; } #define MovieObj_tp_free PyObject_Del @@ -7353,14 +7360,15 @@ { SGOutputObject *it; if (itself == NULL) { - PyErr_SetString(Qt_Error,"Cannot create SGOutput from NULL pointer"); - return NULL; - } + PyErr_SetString(Qt_Error,"Cannot create SGOutput from NULL pointer"); + return NULL; + } it = PyObject_NEW(SGOutputObject, &SGOutput_Type); if (it == NULL) return NULL; it->ob_itself = itself; return (PyObject *)it; } + int SGOutputObj_Convert(PyObject *v, SGOutput *p_itself) { if (v == Py_None) @@ -7399,16 +7407,16 @@ #define SGOutputObj_tp_alloc PyType_GenericAlloc -static PyObject *SGOutputObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *SGOutputObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; SGOutput itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, SGOutputObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((SGOutputObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, SGOutputObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((SGOutputObject *)_self)->ob_itself = itself; + return _self; } #define SGOutputObj_tp_free PyObject_Del @@ -27993,18 +28001,18 @@ - PyMac_INIT_TOOLBOX_OBJECT_NEW(Track, TrackObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(Track, TrackObj_Convert); - PyMac_INIT_TOOLBOX_OBJECT_NEW(Movie, MovieObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(Movie, MovieObj_Convert); - PyMac_INIT_TOOLBOX_OBJECT_NEW(MovieController, MovieCtlObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(MovieController, MovieCtlObj_Convert); - PyMac_INIT_TOOLBOX_OBJECT_NEW(TimeBase, TimeBaseObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(TimeBase, TimeBaseObj_Convert); - PyMac_INIT_TOOLBOX_OBJECT_NEW(UserData, UserDataObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(UserData, UserDataObj_Convert); - PyMac_INIT_TOOLBOX_OBJECT_NEW(Media, MediaObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(Media, MediaObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(Track, TrackObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(Track, TrackObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(Movie, MovieObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(Movie, MovieObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(MovieController, MovieCtlObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(MovieController, MovieCtlObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(TimeBase, TimeBaseObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(TimeBase, TimeBaseObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(UserData, UserDataObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(UserData, UserDataObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(Media, MediaObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(Media, MediaObj_Convert); m = Py_InitModule("_Qt", Qt_methods); From jhylton at users.sourceforge.net Sun Oct 16 07:24:37 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:37 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/waste wastemodule.c, 1.23.2.2, 1.23.2.3 Message-ID: <20051016052437.47D781E4008@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/waste In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/waste Modified Files: Tag: ast-branch wastemodule.c Log Message: Merge head to branch (for the last time) Index: wastemodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/waste/wastemodule.c,v retrieving revision 1.23.2.2 retrieving revision 1.23.2.3 diff -u -d -r1.23.2.2 -r1.23.2.3 --- wastemodule.c 7 Jan 2005 07:02:04 -0000 1.23.2.2 +++ wastemodule.c 16 Oct 2005 05:24:03 -0000 1.23.2.3 @@ -38,21 +38,21 @@ TextStyle_New(TextStylePtr itself) { - return Py_BuildValue("lllO&", (long)itself->tsFont, (long)itself->tsFace, (long)itself->tsSize, QdRGB_New, - &itself->tsColor); + return Py_BuildValue("lllO&", (long)itself->tsFont, (long)itself->tsFace, (long)itself->tsSize, QdRGB_New, + &itself->tsColor); } static int TextStyle_Convert(PyObject *v, TextStylePtr p_itself) { - long font, face, size; - - if( !PyArg_ParseTuple(v, "lllO&", &font, &face, &size, QdRGB_Convert, &p_itself->tsColor) ) - return 0; - p_itself->tsFont = (short)font; - p_itself->tsFace = (Style)face; - p_itself->tsSize = (short)size; - return 1; + long font, face, size; + + if( !PyArg_ParseTuple(v, "lllO&", &font, &face, &size, QdRGB_Convert, &p_itself->tsColor) ) + return 0; + p_itself->tsFont = (short)font; + p_itself->tsFace = (Style)face; + p_itself->tsSize = (short)size; + return 1; } /* @@ -62,33 +62,33 @@ RunInfo_New(WERunInfo *itself) { - return Py_BuildValue("llhhO&O&", itself->runStart, itself->runEnd, itself->runHeight, - itself->runAscent, TextStyle_New, &itself->runStyle, WEOObj_New, itself->runObject); + return Py_BuildValue("llhhO&O&", itself->runStart, itself->runEnd, itself->runHeight, + itself->runAscent, TextStyle_New, &itself->runStyle, WEOObj_New, itself->runObject); } /* Conversion of long points and rects */ int LongRect_Convert(PyObject *v, LongRect *r) { - return PyArg_Parse(v, "(llll)", &r->left, &r->top, &r->right, &r->bottom); + return PyArg_Parse(v, "(llll)", &r->left, &r->top, &r->right, &r->bottom); } PyObject * LongRect_New(LongRect *r) { - return Py_BuildValue("(llll)", r->left, r->top, r->right, r->bottom); + return Py_BuildValue("(llll)", r->left, r->top, r->right, r->bottom); } int LongPt_Convert(PyObject *v, LongPt *p) { - return PyArg_Parse(v, "(ll)", &p->h, &p->v); + return PyArg_Parse(v, "(ll)", &p->h, &p->v); } PyObject * LongPt_New(LongPt *p) { - return Py_BuildValue("(ll)", p->h, p->v); + return Py_BuildValue("(ll)", p->h, p->v); } /* Stuff for the callbacks: */ @@ -101,110 +101,110 @@ static OSErr any_handler(WESelector what, WEObjectReference who, PyObject *args, PyObject **rv) { - FlavorType tp; - PyObject *key, *func; - - if ( args == NULL ) return errAECorruptData; - - tp = WEGetObjectType(who); - - if( (key=Py_BuildValue("O&O&", PyMac_BuildOSType, tp, PyMac_BuildOSType, what)) == NULL) - return errAECorruptData; - if( (func = PyDict_GetItem(callbackdict, key)) == NULL ) { - Py_DECREF(key); - return errAEHandlerNotFound; - } - Py_INCREF(func); - *rv = PyEval_CallObject(func, args); - Py_DECREF(func); - Py_DECREF(key); - if ( *rv == NULL ) { - PySys_WriteStderr("--Exception in callback: "); - PyErr_Print(); - return errAEReplyNotArrived; - } - return 0; + FlavorType tp; + PyObject *key, *func; + + if ( args == NULL ) return errAECorruptData; + + tp = WEGetObjectType(who); + + if( (key=Py_BuildValue("O&O&", PyMac_BuildOSType, tp, PyMac_BuildOSType, what)) == NULL) + return errAECorruptData; + if( (func = PyDict_GetItem(callbackdict, key)) == NULL ) { + Py_DECREF(key); + return errAEHandlerNotFound; + } + Py_INCREF(func); + *rv = PyEval_CallObject(func, args); + Py_DECREF(func); + Py_DECREF(key); + if ( *rv == NULL ) { + PySys_WriteStderr("--Exception in callback: "); + PyErr_Print(); + return errAEReplyNotArrived; + } + return 0; } static pascal OSErr my_new_handler(Point *objectSize, WEObjectReference objref) { - PyObject *args=NULL, *rv=NULL; - OSErr err; - - args=Py_BuildValue("(O&)", WEOObj_New, objref); - err = any_handler(weNewHandler, objref, args, &rv); - if (!err) { - if (!PyMac_GetPoint(rv, objectSize) ) - err = errAECoercionFail; - } - if ( args ) { - Py_DECREF(args); - } - if ( rv ) { - Py_DECREF(rv); - } - return err; + PyObject *args=NULL, *rv=NULL; + OSErr err; + + args=Py_BuildValue("(O&)", WEOObj_New, objref); + err = any_handler(weNewHandler, objref, args, &rv); + if (!err) { + if (!PyMac_GetPoint(rv, objectSize) ) + err = errAECoercionFail; + } + if ( args ) { + Py_DECREF(args); + } + if ( rv ) { + Py_DECREF(rv); + } + return err; } static pascal OSErr my_dispose_handler(WEObjectReference objref) { - PyObject *args=NULL, *rv=NULL; - OSErr err; - - args=Py_BuildValue("(O&)", WEOObj_New, objref); - err = any_handler(weDisposeHandler, objref, args, &rv); - if ( args ) { - Py_DECREF(args); - } - if ( rv ) { - Py_DECREF(rv); - } - return err; + PyObject *args=NULL, *rv=NULL; + OSErr err; + + args=Py_BuildValue("(O&)", WEOObj_New, objref); + err = any_handler(weDisposeHandler, objref, args, &rv); + if ( args ) { + Py_DECREF(args); + } + if ( rv ) { + Py_DECREF(rv); + } + return err; } static pascal OSErr my_draw_handler(const Rect *destRect, WEObjectReference objref) { - PyObject *args=NULL, *rv=NULL; - OSErr err; - - args=Py_BuildValue("O&O&", PyMac_BuildRect, destRect, WEOObj_New, objref); - err = any_handler(weDrawHandler, objref, args, &rv); - if ( args ) { - Py_DECREF(args); - } - if ( rv ) { - Py_DECREF(rv); - } - return err; + PyObject *args=NULL, *rv=NULL; + OSErr err; + + args=Py_BuildValue("O&O&", PyMac_BuildRect, destRect, WEOObj_New, objref); + err = any_handler(weDrawHandler, objref, args, &rv); + if ( args ) { + Py_DECREF(args); + } + if ( rv ) { + Py_DECREF(rv); + } + return err; } static pascal Boolean my_click_handler(Point hitPt, EventModifiers modifiers, - unsigned long clickTime, WEObjectReference objref) + unsigned long clickTime, WEObjectReference objref) { - PyObject *args=NULL, *rv=NULL; - int retvalue; - OSErr err; - - args=Py_BuildValue("O&llO&", PyMac_BuildPoint, hitPt, - (long)modifiers, (long)clickTime, WEOObj_New, objref); - err = any_handler(weClickHandler, objref, args, &rv); - if (!err) - retvalue = PyInt_AsLong(rv); - else - retvalue = 0; - if ( args ) { - Py_DECREF(args); - } - if ( rv ) { - Py_DECREF(rv); - } - return retvalue; + PyObject *args=NULL, *rv=NULL; + int retvalue; + OSErr err; + + args=Py_BuildValue("O&llO&", PyMac_BuildPoint, hitPt, + (long)modifiers, (long)clickTime, WEOObj_New, objref); + err = any_handler(weClickHandler, objref, args, &rv); + if (!err) + retvalue = PyInt_AsLong(rv); + else + retvalue = 0; + if ( args ) { + Py_DECREF(args); + } + if ( rv ) { + Py_DECREF(rv); + } + return retvalue; } - + static PyObject *waste_Error; @@ -224,14 +224,15 @@ { WEOObject *it; if (itself == NULL) { - Py_INCREF(Py_None); - return Py_None; - } + Py_INCREF(Py_None); + return Py_None; + } it = PyObject_NEW(WEOObject, &WEO_Type); if (it == NULL) return NULL; it->ob_itself = itself; return (PyObject *)it; } + int WEOObj_Convert(PyObject *v, WEObjectReference *p_itself) { if (!WEOObj_Check(v)) @@ -400,16 +401,16 @@ #define WEOObj_tp_alloc PyType_GenericAlloc -static PyObject *WEOObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *WEOObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; WEObjectReference itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, WEOObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((WEOObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, WEOObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((WEOObject *)_self)->ob_itself = itself; + return _self; } #define WEOObj_tp_free PyObject_Del @@ -477,15 +478,16 @@ { wasteObject *it; if (itself == NULL) { - PyErr_SetString(waste_Error,"Cannot create null WE"); - return NULL; - } + PyErr_SetString(waste_Error,"Cannot create null WE"); + return NULL; + } it = PyObject_NEW(wasteObject, &waste_Type); if (it == NULL) return NULL; it->ob_itself = itself; WESetInfo(weRefCon, (void *)&it, itself); return (PyObject *)it; } + int wasteObj_Convert(PyObject *v, WEReference *p_itself) { if (!wasteObj_Check(v)) @@ -2149,16 +2151,16 @@ #define wasteObj_tp_alloc PyType_GenericAlloc -static PyObject *wasteObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *wasteObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; WEReference itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, wasteObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((wasteObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, wasteObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((wasteObject *)_self)->ob_itself = itself; + return _self; } #define wasteObj_tp_free PyObject_Del @@ -2418,42 +2420,42 @@ { PyObject *_res = NULL; - OSErr err; - // install the sample object handlers for pictures and sounds -#define kTypePicture 'PICT' -#define kTypeSound 'snd ' - - if ( !PyArg_ParseTuple(_args, "") ) return NULL; - - if ((err = WEInstallObjectHandler(kTypePicture, weNewHandler, - (UniversalProcPtr) NewWENewObjectProc(HandleNewPicture), NULL)) != noErr) - goto cleanup; - - if ((err = WEInstallObjectHandler(kTypePicture, weDisposeHandler, - (UniversalProcPtr) NewWEDisposeObjectProc(HandleDisposePicture), NULL)) != noErr) - goto cleanup; - - if ((err = WEInstallObjectHandler(kTypePicture, weDrawHandler, - (UniversalProcPtr) NewWEDrawObjectProc(HandleDrawPicture), NULL)) != noErr) - goto cleanup; - - if ((err = WEInstallObjectHandler(kTypeSound, weNewHandler, - (UniversalProcPtr) NewWENewObjectProc(HandleNewSound), NULL)) != noErr) - goto cleanup; - - if ((err = WEInstallObjectHandler(kTypeSound, weDrawHandler, - (UniversalProcPtr) NewWEDrawObjectProc(HandleDrawSound), NULL)) != noErr) - goto cleanup; - - if ((err = WEInstallObjectHandler(kTypeSound, weClickHandler, - (UniversalProcPtr) NewWEClickObjectProc(HandleClickSound), NULL)) != noErr) - goto cleanup; - Py_INCREF(Py_None); - _res = Py_None; - return _res; - + OSErr err; + // install the sample object handlers for pictures and sounds +#define kTypePicture 'PICT' +#define kTypeSound 'snd ' + + if ( !PyArg_ParseTuple(_args, "") ) return NULL; + + if ((err = WEInstallObjectHandler(kTypePicture, weNewHandler, + (UniversalProcPtr) NewWENewObjectProc(HandleNewPicture), NULL)) != noErr) + goto cleanup; + + if ((err = WEInstallObjectHandler(kTypePicture, weDisposeHandler, + (UniversalProcPtr) NewWEDisposeObjectProc(HandleDisposePicture), NULL)) != noErr) + goto cleanup; + + if ((err = WEInstallObjectHandler(kTypePicture, weDrawHandler, + (UniversalProcPtr) NewWEDrawObjectProc(HandleDrawPicture), NULL)) != noErr) + goto cleanup; + + if ((err = WEInstallObjectHandler(kTypeSound, weNewHandler, + (UniversalProcPtr) NewWENewObjectProc(HandleNewSound), NULL)) != noErr) + goto cleanup; + + if ((err = WEInstallObjectHandler(kTypeSound, weDrawHandler, + (UniversalProcPtr) NewWEDrawObjectProc(HandleDrawSound), NULL)) != noErr) + goto cleanup; + + if ((err = WEInstallObjectHandler(kTypeSound, weClickHandler, + (UniversalProcPtr) NewWEClickObjectProc(HandleClickSound), NULL)) != noErr) + goto cleanup; + Py_INCREF(Py_None); + _res = Py_None; + return _res; + cleanup: - return PyMac_Error(err); + return PyMac_Error(err); } @@ -2461,39 +2463,39 @@ { PyObject *_res = NULL; - OSErr err; - FlavorType objectType; - WESelector selector; - PyObject *py_handler; - UniversalProcPtr handler; - WEReference we = NULL; - PyObject *key; - - - if ( !PyArg_ParseTuple(_args, "O&O&O|O&", - PyMac_GetOSType, &objectType, - PyMac_GetOSType, &selector, - &py_handler, - WEOObj_Convert, &we) ) return NULL; - - if ( selector == weNewHandler ) handler = (UniversalProcPtr)upp_new_handler; - else if ( selector == weDisposeHandler ) handler = (UniversalProcPtr)upp_dispose_handler; - else if ( selector == weDrawHandler ) handler = (UniversalProcPtr)upp_draw_handler; - else if ( selector == weClickHandler ) handler = (UniversalProcPtr)upp_click_handler; - else return PyMac_Error(weUndefinedSelectorErr); - - if ((key = Py_BuildValue("O&O&", - PyMac_BuildOSType, objectType, - PyMac_BuildOSType, selector)) == NULL ) - return NULL; - - PyDict_SetItem(callbackdict, key, py_handler); - - err = WEInstallObjectHandler(objectType, selector, handler, we); - if ( err ) return PyMac_Error(err); - Py_INCREF(Py_None); - _res = Py_None; - return _res; + OSErr err; + FlavorType objectType; + WESelector selector; + PyObject *py_handler; + UniversalProcPtr handler; + WEReference we = NULL; + PyObject *key; + + + if ( !PyArg_ParseTuple(_args, "O&O&O|O&", + PyMac_GetOSType, &objectType, + PyMac_GetOSType, &selector, + &py_handler, + WEOObj_Convert, &we) ) return NULL; + + if ( selector == weNewHandler ) handler = (UniversalProcPtr)upp_new_handler; + else if ( selector == weDisposeHandler ) handler = (UniversalProcPtr)upp_dispose_handler; + else if ( selector == weDrawHandler ) handler = (UniversalProcPtr)upp_draw_handler; + else if ( selector == weClickHandler ) handler = (UniversalProcPtr)upp_click_handler; + else return PyMac_Error(weUndefinedSelectorErr); + + if ((key = Py_BuildValue("O&O&", + PyMac_BuildOSType, objectType, + PyMac_BuildOSType, selector)) == NULL ) + return NULL; + + PyDict_SetItem(callbackdict, key, py_handler); + + err = WEInstallObjectHandler(objectType, selector, handler, we); + if ( err ) return PyMac_Error(err); + Py_INCREF(Py_None); + _res = Py_None; + return _res; } @@ -2535,18 +2537,18 @@ PyObject * ExistingwasteObj_New(w) - WEReference w; + WEReference w; { - PyObject *it = NULL; - - if (w == NULL) - it = NULL; - else - WEGetInfo(weRefCon, (void *)&it, w); - if (it == NULL || ((wasteObject *)it)->ob_itself != w) - it = Py_None; - Py_INCREF(it); - return it; + PyObject *it = NULL; + + if (w == NULL) + it = NULL; + else + WEGetInfo(weRefCon, (void *)&it, w); + if (it == NULL || ((wasteObject *)it)->ob_itself != w) + it = Py_None; + Py_INCREF(it); + return it; } @@ -2579,13 +2581,13 @@ Py_INCREF(&waste_Type); PyModule_AddObject(m, "wasteType", (PyObject *)&waste_Type); - callbackdict = PyDict_New(); - if (callbackdict == NULL || PyDict_SetItemString(d, "callbacks", callbackdict) != 0) - return; - upp_new_handler = NewWENewObjectProc(my_new_handler); - upp_dispose_handler = NewWEDisposeObjectProc(my_dispose_handler); - upp_draw_handler = NewWEDrawObjectProc(my_draw_handler); - upp_click_handler = NewWEClickObjectProc(my_click_handler); + callbackdict = PyDict_New(); + if (callbackdict == NULL || PyDict_SetItemString(d, "callbacks", callbackdict) != 0) + return; + upp_new_handler = NewWENewObjectProc(my_new_handler); + upp_dispose_handler = NewWEDisposeObjectProc(my_dispose_handler); + upp_draw_handler = NewWEDrawObjectProc(my_draw_handler); + upp_click_handler = NewWEClickObjectProc(my_click_handler); } From jhylton at users.sourceforge.net Sun Oct 16 07:24:35 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/osa _OSAmodule.c, 1.3.6.1, 1.3.6.2 Message-ID: <20051016052435.CAF621E4020@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/osa In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/osa Modified Files: Tag: ast-branch _OSAmodule.c Log Message: Merge head to branch (for the last time) Index: _OSAmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/osa/_OSAmodule.c,v retrieving revision 1.3.6.1 retrieving revision 1.3.6.2 diff -u -d -r1.3.6.1 -r1.3.6.2 --- _OSAmodule.c 7 Jan 2005 07:01:38 -0000 1.3.6.1 +++ _OSAmodule.c 16 Oct 2005 05:24:02 -0000 1.3.6.2 @@ -45,21 +45,22 @@ { OSAComponentInstanceObject *it; if (itself == NULL) { - PyErr_SetString(OSA_Error,"NULL ComponentInstance"); - return NULL; - } + PyErr_SetString(OSA_Error,"NULL ComponentInstance"); + return NULL; + } it = PyObject_NEW(OSAComponentInstanceObject, &OSAComponentInstance_Type); if (it == NULL) return NULL; it->ob_itself = itself; return (PyObject *)it; } + int OSAObj_Convert(PyObject *v, ComponentInstance *p_itself) { - if (CmpInstObj_Convert(v, p_itself)) - return 1; - PyErr_Clear(); - + if (CmpInstObj_Convert(v, p_itself)) + return 1; + PyErr_Clear(); + if (!OSAObj_Check(v)) { PyErr_SetString(PyExc_TypeError, "OSAComponentInstance required"); @@ -1133,16 +1134,16 @@ #define OSAObj_tp_alloc PyType_GenericAlloc -static PyObject *OSAObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *OSAObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; ComponentInstance itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, OSAObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((OSAComponentInstanceObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, OSAObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((OSAComponentInstanceObject *)_self)->ob_itself = itself; + return _self; } #define OSAObj_tp_free PyObject_Del @@ -1210,8 +1211,8 @@ /* - PyMac_INIT_TOOLBOX_OBJECT_NEW(ComponentInstance, OSAObj_New); - PyMac_INIT_TOOLBOX_OBJECT_CONVERT(ComponentInstance, OSAObj_Convert); + PyMac_INIT_TOOLBOX_OBJECT_NEW(ComponentInstance, OSAObj_New); + PyMac_INIT_TOOLBOX_OBJECT_CONVERT(ComponentInstance, OSAObj_Convert); */ From jhylton at users.sourceforge.net Sun Oct 16 07:24:35 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Mac/Modules/mlte _Mltemodule.c, 1.10.2.2, 1.10.2.3 Message-ID: <20051016052435.D74961E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Mac/Modules/mlte In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Mac/Modules/mlte Modified Files: Tag: ast-branch _Mltemodule.c Log Message: Merge head to branch (for the last time) Index: _Mltemodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Modules/mlte/_Mltemodule.c,v retrieving revision 1.10.2.2 retrieving revision 1.10.2.3 diff -u -d -r1.10.2.2 -r1.10.2.3 --- _Mltemodule.c 7 Jan 2005 07:01:30 -0000 1.10.2.2 +++ _Mltemodule.c 16 Oct 2005 05:24:02 -0000 1.10.2.3 @@ -38,14 +38,14 @@ static int OptFSSpecPtr_Convert(PyObject *v, FSSpec **p_itself) { - static FSSpec fss; - if (v == Py_None) - { - *p_itself = NULL; - return 1; - } - *p_itself = &fss; - return PyMac_GetFSSpec(v, *p_itself); + static FSSpec fss; + if (v == Py_None) + { + *p_itself = NULL; + return 1; + } + *p_itself = &fss; + return PyMac_GetFSSpec(v, *p_itself); } /* @@ -54,15 +54,15 @@ static int OptRectPtr_Convert(PyObject *v, Rect **p_itself) { - static Rect r; - - if (v == Py_None) - { - *p_itself = NULL; - return 1; - } - *p_itself = &r; - return PyMac_GetRect(v, *p_itself); + static Rect r; + + if (v == Py_None) + { + *p_itself = NULL; + return 1; + } + *p_itself = &r; + return PyMac_GetRect(v, *p_itself); } /* @@ -70,13 +70,13 @@ */ static int OptGWorldObj_Convert(PyObject *v, GWorldPtr *p_itself) -{ - if (v == Py_None) - { - *p_itself = NULL; - return 1; - } - return GWorldObj_Convert(v, p_itself); +{ + if (v == Py_None) + { + *p_itself = NULL; + return 1; + } + return GWorldObj_Convert(v, p_itself); } @@ -102,6 +102,7 @@ it->ob_itself = itself; return (PyObject *)it; } + int TXNObj_Convert(PyObject *v, TXNObject *p_itself) { if (!TXNObj_Check(v)) @@ -1255,16 +1256,16 @@ #define TXNObj_tp_alloc PyType_GenericAlloc -static PyObject *TXNObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *TXNObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; TXNObject itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, TXNObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((TXNObjectObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, TXNObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((TXNObjectObject *)_self)->ob_itself = itself; + return _self; } #define TXNObj_tp_free PyObject_Del @@ -1337,6 +1338,7 @@ it->ob_itself = itself; return (PyObject *)it; } + int TXNFontMenuObj_Convert(PyObject *v, TXNFontMenuObject *p_itself) { if (!TXNFontMenuObj_Check(v)) @@ -1408,16 +1410,16 @@ #define TXNFontMenuObj_tp_alloc PyType_GenericAlloc -static PyObject *TXNFontMenuObj_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static PyObject *TXNFontMenuObj_tp_new(PyTypeObject *type, PyObject *_args, PyObject *_kwds) { - PyObject *self; + PyObject *_self; TXNFontMenuObject itself; char *kw[] = {"itself", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kw, TXNFontMenuObj_Convert, &itself)) return NULL; - if ((self = type->tp_alloc(type, 0)) == NULL) return NULL; - ((TXNFontMenuObjectObject *)self)->ob_itself = itself; - return self; + if (!PyArg_ParseTupleAndKeywords(_args, _kwds, "O&", kw, TXNFontMenuObj_Convert, &itself)) return NULL; + if ((_self = type->tp_alloc(type, 0)) == NULL) return NULL; + ((TXNFontMenuObjectObject *)_self)->ob_itself = itself; + return _self; } #define TXNFontMenuObj_tp_free PyObject_Del @@ -1626,7 +1628,7 @@ TXNInitOptions iUsageFlags; PyMac_PRECHECK(TXNInitTextension); if (!PyArg_ParseTuple(_args, "l", &iUsageFlags)) - return NULL; + return NULL; _err = TXNInitTextension(iDefaultFonts, iCountDefaultFonts, iUsageFlags); @@ -1667,7 +1669,7 @@ - // PyMac_INIT_TOOLBOX_OBJECT_NEW(xxxx); + // PyMac_INIT_TOOLBOX_OBJECT_NEW(xxxx); m = Py_InitModule("_Mlte", Mlte_methods); From jhylton at users.sourceforge.net Sun Oct 16 07:24:37 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:37 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Modules/cjkcodecs _codecs_jp.c, 1.1.6.1, 1.1.6.2 cjkcodecs.h, 1.5.4.1, 1.5.4.2 multibytecodec.c, 1.3.4.1, 1.3.4.2 Message-ID: <20051016052437.C85761E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Modules/cjkcodecs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Modules/cjkcodecs Modified Files: Tag: ast-branch _codecs_jp.c cjkcodecs.h multibytecodec.c Log Message: Merge head to branch (for the last time) Index: _codecs_jp.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/cjkcodecs/_codecs_jp.c,v retrieving revision 1.1.6.1 retrieving revision 1.1.6.2 diff -u -d -r1.1.6.1 -r1.1.6.2 --- _codecs_jp.c 7 Jan 2005 07:03:32 -0000 1.1.6.1 +++ _codecs_jp.c 16 Oct 2005 05:24:04 -0000 1.1.6.2 @@ -36,7 +36,7 @@ continue; } else if (c >= 0xf8f0 && c <= 0xf8f3) { - /* Windows compatability */ + /* Windows compatibility */ REQUIRE_OUTBUF(1) if (c == 0xf8f0) OUT1(0xa0) Index: cjkcodecs.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/cjkcodecs/cjkcodecs.h,v retrieving revision 1.5.4.1 retrieving revision 1.5.4.2 diff -u -d -r1.5.4.1 -r1.5.4.2 --- cjkcodecs.h 7 Jan 2005 07:03:35 -0000 1.5.4.1 +++ cjkcodecs.h 16 Oct 2005 05:24:04 -0000 1.5.4.2 @@ -12,7 +12,10 @@ #include "multibytecodec.h" -#define UNIINV Py_UNICODE_REPLACEMENT_CHARACTER +/* a unicode "undefined" codepoint */ +#define UNIINV 0xFFFE + +/* internal-use DBCS codepoints which aren't used by any charsets */ #define NOCHAR 0xFFFF #define MULTIC 0xFFFE #define DBCINV 0xFFFD Index: multibytecodec.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/cjkcodecs/multibytecodec.c,v retrieving revision 1.3.4.1 retrieving revision 1.3.4.2 diff -u -d -r1.3.4.1 -r1.3.4.2 --- multibytecodec.c 7 Jan 2005 07:03:40 -0000 1.3.4.1 +++ multibytecodec.c 16 Oct 2005 05:24:04 -0000 1.3.4.2 @@ -1265,6 +1265,10 @@ void init_multibytecodec(void) { + MultibyteCodec_Type.ob_type = &PyType_Type; + MultibyteStreamReader_Type.ob_type = &PyType_Type; + MultibyteStreamWriter_Type.ob_type = &PyType_Type; + Py_InitModule("_multibytecodec", __methods); if (PyErr_Occurred()) From jhylton at users.sourceforge.net Sun Oct 16 07:24:37 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:37 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libfunctional.tex, NONE, 1.4.2.2 libhashlib.tex, NONE, 1.1.2.2 libreconvert.tex, NONE, 1.1.2.2 libspwd.tex, NONE, 1.2.2.2 libzipimport.tex, NONE, 1.1.6.2 asttable.tex, 1.1.14.1, 1.1.14.2 email.tex, 1.11.2.2, 1.11.2.3 emailutil.tex, 1.6.2.2, 1.6.2.3 lib.tex, 1.202.2.2, 1.202.2.3 libasyncore.tex, 1.13.2.2, 1.13.2.3 libaudioop.tex, 1.20, 1.20.8.1 libbinascii.tex, 1.23, 1.23.8.1 libbsddb.tex, 1.7.2.2, 1.7.2.3 libcgi.tex, 1.36.2.2, 1.36.2.3 libcodecs.tex, 1.10.2.2, 1.10.2.3 libcollections.tex, 1.10.6.1, 1.10.6.2 libcookie.tex, 1.7.8.2, 1.7.8.3 libcsv.tex, 1.4.4.2, 1.4.4.3 libcurses.tex, 1.39.2.2, 1.39.2.3 libdatetime.tex, 1.44.6.2, 1.44.6.3 libdecimal.tex, 1.24.4.1, 1.24.4.2 libdis.tex, 1.37.2.2, 1.37.2.3 libfcntl.tex, 1.29.2.2, 1.29.2.3 libfuncs.tex, 1.109.2.2, 1.109.2.3 libgc.tex, 1.10.2.2, 1.10.2.3 libglob.tex, 1.12.26.1, 1.12.26.2 libgrp.tex, 1.16, 1.16.2.1 libhmac.tex, 1.1, 1.1.14.1 libhttplib.tex, 1.31.2.2, 1.31.2.3 libimaplib.tex, 1.20.2.2, 1.20.2.3 libimghdr.tex, 1.13, 1.13.26.1 libimp.tex, 1.32.2.2, 1.32.2.3 libitertools.tex, 1.7.4.2, 1.7.4.3 liblogging.tex, 1.8.6.2, 1.8.6.3 libmarshal.tex, 1.22.10.1, 1.22.10.2 libmd5.tex, 1.21, 1.21.10.1 libmmap.tex, 1.8.8.1, 1.8.8.2 libnew.tex, 1.9.2.1, 1.9.2.2 libnntplib.tex, 1.27.12.2, 1.27.12.3 liboperator.tex, 1.21.10.2, 1.21.10.3 libos.tex, 1.91.2.2, 1.91.2.3 libossaudiodev.tex, 1.6.4.2, 1.6.4.3 libpoplib.tex, 1.14.8.2, 1.14.8.3 libposixpath.tex, 1.23.2.2, 1.23.2.3 libpwd.tex, 1.14, 1.14.2.1 librandom.tex, 1.30.2.2, 1.30.2.3 libre.tex, 1.84.2.2, 1.84.2.3 libsets.tex, 1.11.6.2, 1.11.6.3 libsha.tex, 1.10.10.1, 1.10.10.2 libshelve.tex, 1.14.26.2, 1.14.26.3 libshlex.tex, 1.12.20.2, 1.12.20.3 libshutil.tex, 1.10.2.2, 1.10.2.3 libsimplexmlrpc.tex, 1.3.8.2, 1.3.8.3 libsite.tex, 1.23.2.2, 1.23.2.3 libsmtplib.tex, 1.22.2.2, 1.22.2.3 libsocket.tex, 1.68.2.2, 1.68.2.3 libsocksvr.tex, 1.14.24.2, 1.14.24.3 libstdtypes.tex, 1.98.2.2, 1.98.2.3 libstdwin.tex, 1.25.10.1, 1.25.10.2 libstringio.tex, 1.6.20.2, 1.6.20.3 libsubprocess.tex, 1.5.2.1, 1.5.2.2 libtarfile.tex, 1.1.6.2, 1.1.6.3 libtempfile.tex, 1.17.8.2, 1.17.8.3 libthread.tex, 1.25.2.2, 1.25.2.3 libthreading.tex, 1.12.2.2, 1.12.2.3 libtokenize.tex, 1.5, 1.5.20.1 libunicodedata.tex, 1.3.24.2, 1.3.24.3 liburllib.tex, 1.43.2.2, 1.43.2.3 liburllib2.tex, 1.6.10.2, 1.6.10.3 liburlparse.tex, 1.20.8.1, 1.20.8.2 libuserdict.tex, 1.21.2.2, 1.21.2.3 libuu.tex, 1.11.18.2, 1.11.18.3 libweakref.tex, 1.17.8.2, 1.17.8.3 libwebbrowser.tex, 1.10, 1.10.18.1 libxmlrpclib.tex, 1.9.2.2, 1.9.2.3 libzlib.tex, 1.27.12.1, 1.27.12.2 tkinter.tex, 1.12.2.2, 1.12.2.3 xmldom.tex, 1.19.8.2, 1.19.8.3 Message-ID: <20051016052437.D2AA91E4015@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Doc/lib Modified Files: Tag: ast-branch asttable.tex email.tex emailutil.tex lib.tex libasyncore.tex libaudioop.tex libbinascii.tex libbsddb.tex libcgi.tex libcodecs.tex libcollections.tex libcookie.tex libcsv.tex libcurses.tex libdatetime.tex libdecimal.tex libdis.tex libfcntl.tex libfuncs.tex libgc.tex libglob.tex libgrp.tex libhmac.tex libhttplib.tex libimaplib.tex libimghdr.tex libimp.tex libitertools.tex liblogging.tex libmarshal.tex libmd5.tex libmmap.tex libnew.tex libnntplib.tex liboperator.tex libos.tex libossaudiodev.tex libpoplib.tex libposixpath.tex libpwd.tex librandom.tex libre.tex libsets.tex libsha.tex libshelve.tex libshlex.tex libshutil.tex libsimplexmlrpc.tex libsite.tex libsmtplib.tex libsocket.tex libsocksvr.tex libstdtypes.tex libstdwin.tex libstringio.tex libsubprocess.tex libtarfile.tex libtempfile.tex libthread.tex libthreading.tex libtokenize.tex libunicodedata.tex liburllib.tex liburllib2.tex liburlparse.tex libuserdict.tex libuu.tex libweakref.tex libwebbrowser.tex libxmlrpclib.tex libzlib.tex tkinter.tex xmldom.tex Added Files: Tag: ast-branch libfunctional.tex libhashlib.tex libreconvert.tex libspwd.tex libzipimport.tex Log Message: Merge head to branch (for the last time) --- NEW FILE: libfunctional.tex --- \section{\module{functional} --- Higher order functions and operations on callable objects.} \declaremodule{standard}{functional} % standard library, in Python \moduleauthor{Peter Harris}{scav at blueyonder.co.uk} \moduleauthor{Raymond Hettinger}{python at rcn.com} \sectionauthor{Peter Harris}{scav at blueyonder.co.uk} \modulesynopsis{Higher-order functions and operations on callable objects.} \versionadded{2.5} The \module{functional} module is for higher-order functions: functions that act on or return other functions. In general, any callable object can be treated as a function for the purposes of this module. The \module{functional} module defines the following function: \begin{funcdesc}{partial}{func\optional{,*args}\optional{, **keywords}} Return a new \class{partial} object which when called will behave like \var{func} called with the positional arguments \var{args} and keyword arguments \var{keywords}. If more arguments are supplied to the call, they are appended to \var{args}. If additional keyword arguments are supplied, they extend and override \var{keywords}. Roughly equivalent to: \begin{verbatim} def partial(func, *args, **keywords): def newfunc(*fargs, **fkeywords): newkeywords = keywords.copy() newkeywords.update(fkeywords) return func(*(args + fargs), **newkeywords) newfunc.func = func newfunc.args = args newfunc.keywords = keywords return newfunc \end{verbatim} The \function{partial} is used for partial function application which ``freezes'' some portion of a function's arguments and/or keywords resulting in a new object with a simplified signature. For example, \function{partial} can be used to create a callable that behaves like the \function{int} function where the \var{base} argument defaults to two: \begin{verbatim} >>> basetwo = partial(int, base=2) >>> basetwo.__doc__('Convert base 2 string to an int.') >>> basetwo('10010') 18 \end{verbatim} \end{funcdesc} \subsection{\class{partial} Objects \label{partial-objects}} \class{partial} objects are callable objects created by \function{partial()}. They have three read-only attributes: \begin{memberdesc}[callable]{func}{} A callable object or function. Calls to the \class{partial} object will be forwarded to \member{func} with new arguments and keywords. \end{memberdesc} \begin{memberdesc}[tuple]{args}{} The leftmost positional arguments that will be prepended to the positional arguments provided to a \class{partial} object call. \end{memberdesc} \begin{memberdesc}[dict]{keywords}{} The keyword arguments that will be supplied when the \class{partial} object is called. \end{memberdesc} \class{partial} objects are like \class{function} objects in that they are callable, weak referencable, and can have attributes. There are some important differences. For instance, the \member{__name__} and \member{__doc__} attributes are not created automatically. Also, \class{partial} objects defined in classes behave like static methods and do not transform into bound methods during instance attribute look-up. --- NEW FILE: libhashlib.tex --- \section{\module{hashlib} --- Secure hashes and message digests} \declaremodule{builtin}{hashlib} \modulesynopsis{Secure hash and message digest algorithms.} \moduleauthor{Gregory P. Smith}{greg at users.sourceforge.net} \sectionauthor{Gregory P. Smith}{greg at users.sourceforge.net} \versionadded{2.5} \index{message digest, MD5} \index{secure hash algorithm, SHA1, SHA224, SHA256, SHA384, SHA512} This module implements a common interface to many different secure hash and message digest algorithms. Included are the FIPS secure hash algorithms SHA1, SHA224, SHA256, SHA384, and SHA512 (defined in FIPS 180-2) as well as RSA's MD5 algorithm (defined in Internet \rfc{1321}). The terms secure hash and message digest are interchangable. Older algorithms were called message digests. The modern term is secure hash. \warning{Some algorithms have known hash collision weaknesses, see the FAQ at the end.} There is one constructor method named for each type of \dfn{hash}. All return a hash object with the same simple interface. For example: use \function{sha1()} to create a SHA1 hash object. You can now feed this object with arbitrary strings using the \method{update()} method. At any point you can ask it for the \dfn{digest} of the concatenation of the strings fed to it so far using the \method{digest()} or \method{hexdigest()} methods. Constructors for hash algorithms that are always present in this module are \function{md5()}, \function{sha1()}, \function{sha224()}, \function{sha256()}, \function{sha384()}, and \function{sha512()}. Additional algorithms may also be available depending upon the OpenSSL library python uses on your platform. \index{OpenSSL} For example, to obtain the digest of the string \code{'Nobody inspects the spammish repetition'}: \begin{verbatim} >>> import hashlib >>> m = hashlib.md5() >>> m.update("Nobody inspects") >>> m.update(" the spammish repetition") >>> m.digest() '\xbbd\x9c\x83\xdd\x1e\xa5\xc9\xd9\xde\xc9\xa1\x8d\xf0\xff\xe9' \end{verbatim} More condensed: \begin{verbatim} >>> hashlib.sha224("Nobody inspects the spammish repetition").hexdigest() 'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2' \end{verbatim} A generic \function{new()} constructor that takes the string name of the desired algorithm as its first parameter also exists to allow access to the above listed hashes as well as any other algorithms that your OpenSSL library may offer. The named constructors are much faster than \function{new()} and should be preferred. Using \function{new()} with an algorithm provided by OpenSSL: \begin{verbatim} >>> h = hashlib.new('ripemd160') >>> h.update("Nobody inspects the spammish repetition") >>> h.hexdigest() 'cc4a5ce1b3df48aec5d22d1f16b894a0b894eccc' \end{verbatim} The following values are provided as constant attributes of the hash objects returned by the constructors: \begin{datadesc}{digest_size} The size of the resulting digest in bytes. \end{datadesc} A hash object has the following methods: \begin{methoddesc}[hash]{update}{arg} Update the hash object with the string \var{arg}. Repeated calls are equivalent to a single call with the concatenation of all the arguments: \code{m.update(a); m.update(b)} is equivalent to \code{m.update(a+b)}. \end{methoddesc} \begin{methoddesc}[hash]{digest}{} Return the digest of the strings passed to the \method{update()} method so far. This is a 16-byte string which may contain non-\ASCII{} characters, including null bytes. \end{methoddesc} \begin{methoddesc}[hash]{hexdigest}{} Like \method{digest()} except the digest is returned as a string of double length, containing only hexadecimal digits. This may be used to exchange the value safely in email or other non-binary environments. \end{methoddesc} \begin{methoddesc}[hash]{copy}{} Return a copy (``clone'') of the hash object. This can be used to efficiently compute the digests of strings that share a common initial substring. \end{methoddesc} \begin{seealso} \seemodule{hmac}{A module to generate message authentication codes using hashes.} \seemodule{base64}{Another way to encode binary hashes for non-binary environments.} \seeurl{http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf} {The FIPS 180-2 publication on Secure Hash Algorithms.} \seeurl{http://www.cryptography.com/cnews/hash.html} {Hash Collision FAQ with information on which algorithms have known issues and what that means regarding their use.} \end{seealso} --- NEW FILE: libreconvert.tex --- \section{\module{reconvert} --- Convert regular expressions from regex to re form} \declaremodule{standard}{reconvert} \moduleauthor{Andrew M. Kuchling}{amk at amk.ca} \sectionauthor{Skip Montanaro}{skip at pobox.com} \modulesynopsis{Convert regex-, emacs- or sed-style regular expressions to re-style syntax.} This module provides a facility to convert regular expressions from the syntax used by the deprecated \module{regex} module to those used by the newer \module{re} module. Because of similarity between the regular expression syntax of \code{sed(1)} and \code{emacs(1)} and the \module{regex} module, it is also helpful to convert patterns written for those tools to \module{re} patterns. When used as a script, a Python string literal (or any other expression evaluating to a string) is read from stdin, and the translated expression is written to stdout as a string literal. Unless stdout is a tty, no trailing newline is written to stdout. This is done so that it can be used with Emacs \code{C-U M-|} (shell-command-on-region) which filters the region through the shell command. \begin{seealso} \seetitle{Mastering Regular Expressions}{Book on regular expressions by Jeffrey Friedl, published by O'Reilly. The second edition of the book no longer covers Python at all, but the first edition covered writing good regular expression patterns in great detail.} \end{seealso} \subsection{Module Contents} \nodename{Contents of Module reconvert} The module defines two functions and a handful of constants. \begin{funcdesc}{convert}{pattern\optional{, syntax=None}} Convert a \var{pattern} representing a \module{regex}-stype regular expression into a \module{re}-style regular expression. The optional \var{syntax} parameter is a bitwise-or'd set of flags that control what constructs are converted. See below for a description of the various constants. \end{funcdesc} \begin{funcdesc}{quote}{s\optional{, quote=None}} Convert a string object to a quoted string literal. This is similar to \function{repr} but will return a "raw" string (r'...' or r"...") when the string contains backslashes, instead of doubling all backslashes. The resulting string does not always evaluate to the same string as the original; however it will do just the right thing when passed into re.compile(). The optional second argument forces the string quote; it must be a single character which is a valid Python string quote. Note that prior to Python 2.5 this would not accept triple-quoted string delimiters. \end{funcdesc} \begin{datadesc}{RE_NO_BK_PARENS} Suppress paren conversion. This should be omitted when converting \code{sed}-style or \code{emacs}-style regular expressions. \end{datadesc} \begin{datadesc}{RE_NO_BK_VBAR} Suppress vertical bar conversion. This should be omitted when converting \code{sed}-style or \code{emacs}-style regular expressions. \end{datadesc} \begin{datadesc}{RE_BK_PLUS_QM} Enable conversion of \code{+} and \code{?} characters. This should be added to the \var{syntax} arg of \function{convert} when converting \code{sed}-style regular expressions and omitted when converting \code{emacs}-style regular expressions. \end{datadesc} \begin{datadesc}{RE_NEWLINE_OR} When set, newline characters are replaced by \code{|}. \end{datadesc} --- NEW FILE: libspwd.tex --- \section{\module{spwd} --- The shadow password database} \declaremodule{builtin}{spwd} \platform{Unix} \modulesynopsis{The shadow password database (\function{getspnam()} and friends).} \versionadded{2.5} This module provides access to the \UNIX{} shadow password database. It is available on various \UNIX{} versions. You must have enough privileges to access the shadow password database (this usually means you have to be root). Shadow password database entries are reported as a tuple-like object, whose attributes correspond to the members of the \code{spwd} structure (Attribute field below, see \code{}): \begin{tableiii}{r|l|l}{textrm}{Index}{Attribute}{Meaning} \lineiii{0}{\code{sp_nam}}{Login name} \lineiii{1}{\code{sp_pwd}}{Encrypted password} \lineiii{2}{\code{sp_lstchg}}{Date of last change} \lineiii{3}{\code{sp_min}}{Minimal number of days between changes} \lineiii{4}{\code{sp_max}}{Maximum number of days between changes} \lineiii{5}{\code{sp_warn}}{Number of days before password expires to warn user about it} \lineiii{6}{\code{sp_inact}}{Number of days after password expires until account is blocked} \lineiii{7}{\code{sp_expire}}{Number of days since 1970-01-01 until account is disabled} \lineiii{8}{\code{sp_flag}}{Reserved} \end{tableiii} The sp_nam and sp_pwd items are strings, all others are integers. \exception{KeyError} is raised if the entry asked for cannot be found. It defines the following items: \begin{funcdesc}{getspnam}{name} Return the shadow password database entry for the given user name. \end{funcdesc} \begin{funcdesc}{getspall}{} Return a list of all available shadow password database entries, in arbitrary order. \end{funcdesc} \begin{seealso} \seemodule{grp}{An interface to the group database, similar to this.} \seemodule{pwd}{An interface to the normal password database, similar to this.} \end{seealso} --- NEW FILE: libzipimport.tex --- \section{\module{zipimport} --- Import modules from Zip archives} \declaremodule{standard}{zipimport} \modulesynopsis{support for importing Python modules from ZIP archives.} \moduleauthor{Just van Rossum}{just at letterror.com} \versionadded{2.3} This module adds the ability to import Python modules (\file{*.py}, \file{*.py[co]}) and packages from ZIP-format archives. It is usually not needed to use the \module{zipimport} module explicitly; it is automatically used by the builtin \keyword{import} mechanism for \code{sys.path} items that are paths to ZIP archives. Typically, \code{sys.path} is a list of directory names as strings. This module also allows an item of \code{sys.path} to be a string naming a ZIP file archive. The ZIP archive can contain a subdirectory structure to support package imports, and a path within the archive can be specified to only import from a subdirectory. For example, the path \file{/tmp/example.zip/lib/} would only import from the \file{lib/} subdirectory within the archive. Any files may be present in the ZIP archive, but only files \file{.py} and \file{.py[co]} are available for import. ZIP import of dynamic modules (\file{.pyd}, \file{.so}) is disallowed. Note that if an archive only contains \file{.py} files, Python will not attempt to modify the archive by adding the corresponding \file{.pyc} or \file{.pyo} file, meaning that if a ZIP archive doesn't contain \file{.pyc} files, importing may be rather slow. Using the built-in \function{reload()} function will fail if called on a module loaded from a ZIP archive; it is unlikely that \function{reload()} would be needed, since this would imply that the ZIP has been altered during runtime. The available attributes of this module are: \begin{excdesc}{ZipImporterError} Exception raised by zipimporter objects. It's a subclass of \exception{ImportError}, so it can be caught as \exception{ImportError}, too. \end{excdesc} \begin{classdesc*}{zipimporter} The class for importing ZIP files. See ``\citetitle{zipimporter Objects}'' (section \ref{zipimporter-objects}) for constructor details. \end{classdesc*} \begin{seealso} \seetitle[http://www.pkware.com/appnote.html]{PKZIP Application Note}{Documentation on the ZIP file format by Phil Katz, the creator of the format and algorithms used.} \seepep{0273}{Import Modules from Zip Archives}{Written by James C. Ahlstrom, who also provided an implementation. Python 2.3 follows the specification in PEP 273, but uses an implementation written by Just van Rossum that uses the import hooks described in PEP 302.} \seepep{0302}{New Import Hooks}{The PEP to add the import hooks that help this module work.} \end{seealso} \subsection{zipimporter Objects \label{zipimporter-objects}} \begin{classdesc}{zipimporter}{archivepath} Create a new zipimporter instance. \var{archivepath} must be a path to a zipfile. \class{ZipImportError} is raised if \var{archivepath} doesn't point to a valid ZIP archive. \end{classdesc} \begin{methoddesc}{find_module}{fullname\optional{, path}} Search for a module specified by \var{fullname}. \var{fullname} must be the fully qualified (dotted) module name. It returns the zipimporter instance itself if the module was found, or \constant{None} if it wasn't. The optional \var{path} argument is ignored---it's there for compatibility with the importer protocol. \end{methoddesc} \begin{methoddesc}{get_code}{fullname} Return the code object for the specified module. Raise \class{ZipImportError} if the module couldn't be found. \end{methoddesc} \begin{methoddesc}{get_data}{pathname} Return the data associated with \var{pathname}. Raise \exception{IOError} if the file wasn't found. \end{methoddesc} \begin{methoddesc}{get_source}{fullname} Return the source code for the specified module. Raise \class{ZipImportError} if the module couldn't be found, return \constant{None} if the archive does contain the module, but has no source for it. \end{methoddesc} \begin{methoddesc}{is_package}{fullname} Return True if the module specified by \var{fullname} is a package. Raise \class{ZipImportError} if the module couldn't be found. \end{methoddesc} \begin{methoddesc}{load_module}{fullname} Load the module specified by \var{fullname}. \var{fullname} must be the fully qualified (dotted) module name. It returns the imported module, or raises \class{ZipImportError} if it wasn't found. \end{methoddesc} \subsection{Examples} \nodename{zipimport Examples} Here is an example that imports a module from a ZIP archive - note that the \module{zipimport} module is not explicitly used. \begin{verbatim} $ unzip -l /tmp/example.zip Archive: /tmp/example.zip Length Date Time Name -------- ---- ---- ---- 8467 11-26-02 22:30 jwzthreading.py -------- ------- 8467 1 file $ ./python Python 2.3 (#1, Aug 1 2003, 19:54:32) >>> import sys >>> sys.path.insert(0, '/tmp/example.zip') # Add .zip file to front of path >>> import jwzthreading >>> jwzthreading.__file__ '/tmp/example.zip/jwzthreading.py' \end{verbatim} Index: asttable.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/asttable.tex,v retrieving revision 1.1.14.1 retrieving revision 1.1.14.2 diff -u -d -r1.1.14.1 -r1.1.14.2 --- asttable.tex 7 Jan 2005 06:57:04 -0000 1.1.14.1 +++ asttable.tex 16 Oct 2005 05:23:57 -0000 1.1.14.2 @@ -89,6 +89,8 @@ \lineiii{Ellipsis}{}{} \hline +\lineiii{Expression}{\member{node}}{} + \lineiii{Exec}{\member{expr}}{} \lineiii{}{\member{locals}}{} \lineiii{}{\member{globals}}{} Index: email.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/email.tex,v retrieving revision 1.11.2.2 retrieving revision 1.11.2.3 diff -u -d -r1.11.2.2 -r1.11.2.3 --- email.tex 7 Jan 2005 06:57:05 -0000 1.11.2.2 +++ email.tex 16 Oct 2005 05:23:57 -0000 1.11.2.3 @@ -244,7 +244,7 @@ \item The method \method{gettype()} was renamed to \method{get_type()}. -\item The method\method{getmaintype()} was renamed to +\item The method \method{getmaintype()} was renamed to \method{get_main_type()}. \item The method \method{getsubtype()} was renamed to Index: emailutil.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/emailutil.tex,v retrieving revision 1.6.2.2 retrieving revision 1.6.2.3 diff -u -d -r1.6.2.2 -r1.6.2.3 --- emailutil.tex 7 Jan 2005 06:57:05 -0000 1.6.2.2 +++ emailutil.tex 16 Oct 2005 05:23:57 -0000 1.6.2.3 @@ -103,7 +103,8 @@ Optional \var{usegmt} is a flag that when \code{True}, outputs a date string with the timezone as an ascii string \code{GMT}, rather than a numeric \code{-0000}. This is needed for some protocols (such -as HTTP). This only applies when \var{localtime} is \code{False} +as HTTP). This only applies when \var{localtime} is \code{False}. +\versionadded{2.4} \end{funcdesc} \begin{funcdesc}{make_msgid}{\optional{idstring}} Index: lib.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/lib.tex,v retrieving revision 1.202.2.2 retrieving revision 1.202.2.3 diff -u -d -r1.202.2.2 -r1.202.2.3 --- lib.tex 7 Jan 2005 06:57:05 -0000 1.202.2.2 +++ lib.tex 16 Oct 2005 05:23:57 -0000 1.202.2.3 @@ -91,7 +91,9 @@ \input{libmarshal} \input{libwarnings} \input{libimp} +\input{libzipimport} \input{libpkgutil} +\input{libmodulefinder} \input{libcode} \input{libcodeop} \input{libpprint} @@ -106,6 +108,7 @@ \input{libstrings} % String Services \input{libstring} \input{libre} +\input{libreconvert} \input{libstruct} \input{libdifflib} \input{libfpformat} @@ -130,6 +133,7 @@ \input{libarray} \input{libsets} \input{libitertools} +\input{libfunctional} \input{libcfgparser} \input{libfileinput} \input{libcalendar} @@ -191,6 +195,7 @@ \input{libunix} % UNIX Specific Services \input{libposix} \input{libpwd} +\input{libspwd} \input{libgrp} \input{libcrypt} \input{libdl} @@ -298,6 +303,7 @@ \input{libcrypto} % Cryptographic Services \input{libhmac} +\input{libhashlib} \input{libmd5} \input{libsha} Index: libasyncore.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libasyncore.tex,v retrieving revision 1.13.2.2 retrieving revision 1.13.2.3 diff -u -d -r1.13.2.2 -r1.13.2.3 --- libasyncore.tex 7 Jan 2005 06:57:05 -0000 1.13.2.2 +++ libasyncore.tex 16 Oct 2005 05:23:57 -0000 1.13.2.3 @@ -53,12 +53,11 @@ \function{poll()} call, measured in seconds; the default is 30 seconds. The \var{use_poll} parameter, if true, indicates that \function{poll()} should be used in preference to \function{select()} (the default is - \code{False}). The \var{map} parameter is a dictionary whose items are - the channels to watch. As channels are closed they are deleted from their - map. If \var{map} is omitted, a global map is used (this map is updated - by the default class \method{__init__()} -- make sure you extend, rather - than override, \method{__init__()} if you want to retain this behavior). + \code{False}). + The \var{map} parameter is a dictionary whose items are + the channels to watch. As channels are closed they are deleted from their + map. If \var{map} is omitted, a global map is used. Channels (instances of \class{asyncore.dispatcher}, \class{asynchat.async_chat} and subclasses thereof) can freely be mixed in the map. \end{funcdesc} Index: libaudioop.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libaudioop.tex,v retrieving revision 1.20 retrieving revision 1.20.8.1 diff -u -d -r1.20 -r1.20.8.1 --- libaudioop.tex 6 Dec 2001 23:16:09 -0000 1.20 +++ libaudioop.tex 16 Oct 2005 05:23:57 -0000 1.20.8.1 @@ -42,11 +42,6 @@ has the width specified in \var{width}. \end{funcdesc} -\begin{funcdesc}{adpcm32lin}{adpcmfragment, width, state} -Decode an alternative 3-bit ADPCM code. See \function{lin2adpcm3()} -for details. -\end{funcdesc} - \begin{funcdesc}{avg}{fragment, width} Return the average over all samples in the fragment. \end{funcdesc} @@ -122,13 +117,6 @@ packed 2 4-bit values per byte. \end{funcdesc} -\begin{funcdesc}{lin2adpcm3}{fragment, width, state} -This is an alternative ADPCM coder that uses only 3 bits per sample. -It is not compatible with the Intel/DVI ADPCM coder and its output is -not packed (due to laziness on the side of the author). Its use is -discouraged. -\end{funcdesc} - \begin{funcdesc}{lin2ulaw}{fragment, width} Convert samples in the audio fragment to u-LAW encoding and return this as a Python string. u-LAW is an audio encoding format whereby Index: libbinascii.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libbinascii.tex,v retrieving revision 1.23 retrieving revision 1.23.8.1 diff -u -d -r1.23 -r1.23.8.1 --- libbinascii.tex 28 Nov 2001 07:26:15 -0000 1.23 +++ libbinascii.tex 16 Oct 2005 05:23:57 -0000 1.23.8.1 @@ -51,10 +51,13 @@ Convert binary data to a line(s) of \ASCII{} characters in quoted-printable encoding. The return value is the converted line(s). If the optional argument \var{quotetabs} is present and true, all tabs -and spaces will be encoded. If the optional argument \var{header} is +and spaces will be encoded. +If the optional argument \var{istext} is present and true, +newlines are not encoded but trailing whitespace will be encoded. +If the optional argument \var{header} is present and true, spaces will be encoded as underscores per RFC1522. If the optional argument \var{header} is present and false, newline -characters will be encoded as well, otherwise linefeed conversion might +characters will be encoded as well; otherwise linefeed conversion might corrupt the binary data stream. \end{funcdesc} Index: libbsddb.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libbsddb.tex,v retrieving revision 1.7.2.2 retrieving revision 1.7.2.3 diff -u -d -r1.7.2.2 -r1.7.2.3 --- libbsddb.tex 7 Jan 2005 06:57:05 -0000 1.7.2.2 +++ libbsddb.tex 16 Oct 2005 05:23:57 -0000 1.7.2.3 @@ -56,7 +56,7 @@ \begin{funcdesc}{btopen}{filename\optional{, flag\optional{, mode\optional{, btflags\optional{, cachesize\optional{, maxkeypage\optional{, -minkeypage\optional{, psize\optional{, lorder}}}}}}}}} +minkeypage\optional{, pgsize\optional{, lorder}}}}}}}}} Open the btree format file named \var{filename}. Files never intended to be preserved on disk may be created by passing \code{None} as the @@ -71,7 +71,7 @@ \end{funcdesc} \begin{funcdesc}{rnopen}{filename\optional{, flag\optional{, mode\optional{, -rnflags\optional{, cachesize\optional{, psize\optional{, lorder\optional{, +rnflags\optional{, cachesize\optional{, pgsize\optional{, lorder\optional{, reclen\optional{, bval\optional{, bfname}}}}}}}}}} Open a DB record format file named \var{filename}. Files never intended Index: libcgi.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libcgi.tex,v retrieving revision 1.36.2.2 retrieving revision 1.36.2.3 diff -u -d -r1.36.2.2 -r1.36.2.3 --- libcgi.tex 7 Jan 2005 06:57:05 -0000 1.36.2.2 +++ libcgi.tex 16 Oct 2005 05:23:57 -0000 1.36.2.3 @@ -404,7 +404,7 @@ \character{\&}, \character{<} and \character{>} in string \var{s} to HTML-safe sequences. Use this if you need to display text that might contain such characters in HTML. If the optional flag \var{quote} is -true, the double-quote character (\character{"}) is also translated; +true, the quotation mark character (\character{"}) is also translated; this helps for inclusion in an HTML attribute value, as in \code{}. If the value to be quoted might include single- or double-quote characters, or both, consider using the Index: libcodecs.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libcodecs.tex,v retrieving revision 1.10.2.2 retrieving revision 1.10.2.3 diff -u -d -r1.10.2.2 -r1.10.2.3 --- libcodecs.tex 7 Jan 2005 06:57:05 -0000 1.10.2.2 +++ libcodecs.tex 16 Oct 2005 05:23:57 -0000 1.10.2.3 @@ -394,7 +394,7 @@ be extended with \function{register_error()}. \end{classdesc} -\begin{methoddesc}{read}{\optional{size\optional{, chars}}} +\begin{methoddesc}{read}{\optional{size\optional{, chars, \optional{firstline}}}} Decodes data from the stream and returns the resulting object. \var{chars} indicates the number of characters to read from the @@ -408,12 +408,16 @@ decode as much as possible. \var{size} is intended to prevent having to decode huge files in one step. + \var{firstline} indicates that it would be sufficient to only return + the first line, if there are decoding errors on later lines. + The method should use a greedy read strategy meaning that it should read as much data as is allowed within the definition of the encoding and the given size, e.g. if optional encoding endings or state markers are available on the stream, these should be read too. \versionchanged[\var{chars} argument added]{2.4} + \versionchanged[\var{firstline} argument added]{2.4.2} \end{methoddesc} \begin{methoddesc}{readline}{\optional{size\optional{, keepends}}} @@ -879,7 +883,7 @@ {all languages (BMP only)} \lineiii{utf_7} - {U7} + {U7, unicode-1-1-utf-7} {all languages} \lineiii{utf_8} Index: libcollections.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libcollections.tex,v retrieving revision 1.10.6.1 retrieving revision 1.10.6.2 diff -u -d -r1.10.6.1 -r1.10.6.2 --- libcollections.tex 7 Jan 2005 06:57:05 -0000 1.10.6.1 +++ libcollections.tex 16 Oct 2005 05:23:57 -0000 1.10.6.2 @@ -64,6 +64,12 @@ If no elements are present, raises a \exception{IndexError}. \end{methoddesc} +\begin{methoddesc}{remove}{value} + Removed the first occurrence of \var{value}. If not found, + raises a \exception{ValueError}. + \versionadded{2.5} +\end{methoddesc} + \begin{methoddesc}{rotate}{n} Rotate the deque \var{n} steps to the right. If \var{n} is negative, rotate to the left. Rotating one step to the right Index: libcookie.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libcookie.tex,v retrieving revision 1.7.8.2 retrieving revision 1.7.8.3 diff -u -d -r1.7.8.2 -r1.7.8.3 --- libcookie.tex 7 Jan 2005 06:57:05 -0000 1.7.8.2 +++ libcookie.tex 16 Oct 2005 05:23:57 -0000 1.7.8.3 @@ -98,7 +98,9 @@ Return a string representation suitable to be sent as HTTP headers. \var{attrs} and \var{header} are sent to each \class{Morsel}'s \method{output()} method. \var{sep} is used to join the headers -together, and is by default a newline. +together, and is by default the combination \code{'\e r\e n'} (CRLF). +\versionchanged[The default separator has been changed from \code{'\e n'} +to match the cookie specification]{2.5} \end{methoddesc} \begin{methoddesc}[BaseCookie]{js_output}{\optional{attrs}} @@ -195,32 +197,32 @@ >>> C["fig"] = "newton" >>> C["sugar"] = "wafer" >>> print C # generate HTTP headers -Set-Cookie: sugar=wafer; -Set-Cookie: fig=newton; +Set-Cookie: sugar=wafer +Set-Cookie: fig=newton >>> print C.output() # same thing -Set-Cookie: sugar=wafer; -Set-Cookie: fig=newton; +Set-Cookie: sugar=wafer +Set-Cookie: fig=newton >>> C = Cookie.SmartCookie() >>> C["rocky"] = "road" >>> C["rocky"]["path"] = "/cookie" >>> print C.output(header="Cookie:") -Cookie: rocky=road; Path=/cookie; +Cookie: rocky=road; Path=/cookie >>> print C.output(attrs=[], header="Cookie:") -Cookie: rocky=road; +Cookie: rocky=road >>> C = Cookie.SmartCookie() >>> C.load("chips=ahoy; vienna=finger") # load from a string (HTTP header) >>> print C -Set-Cookie: vienna=finger; -Set-Cookie: chips=ahoy; +Set-Cookie: vienna=finger +Set-Cookie: chips=ahoy >>> C = Cookie.SmartCookie() >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') >>> print C -Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;"; +Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" >>> C = Cookie.SmartCookie() >>> C["oreo"] = "doublestuff" >>> C["oreo"]["path"] = "/" >>> print C -Set-Cookie: oreo=doublestuff; Path=/; +Set-Cookie: oreo=doublestuff; Path=/ >>> C = Cookie.SmartCookie() >>> C["twix"] = "none for you" >>> C["twix"].value @@ -233,8 +235,8 @@ >>> C["string"].value 'seven' >>> print C -Set-Cookie: number=7; -Set-Cookie: string=seven; +Set-Cookie: number=7 +Set-Cookie: string=seven >>> C = Cookie.SerialCookie() >>> C["number"] = 7 >>> C["string"] = "seven" @@ -243,8 +245,8 @@ >>> C["string"].value 'seven' >>> print C -Set-Cookie: number="I7\012."; -Set-Cookie: string="S'seven'\012p1\012."; +Set-Cookie: number="I7\012." +Set-Cookie: string="S'seven'\012p1\012." >>> C = Cookie.SmartCookie() >>> C["number"] = 7 >>> C["string"] = "seven" @@ -253,6 +255,6 @@ >>> C["string"].value 'seven' >>> print C -Set-Cookie: number="I7\012."; -Set-Cookie: string=seven; +Set-Cookie: number="I7\012." +Set-Cookie: string=seven \end{verbatim} Index: libcsv.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libcsv.tex,v retrieving revision 1.4.4.2 retrieving revision 1.4.4.3 diff -u -d -r1.4.4.2 -r1.4.4.3 --- libcsv.tex 7 Jan 2005 06:57:05 -0000 1.4.4.2 +++ libcsv.tex 16 Oct 2005 05:23:57 -0000 1.4.4.3 @@ -50,11 +50,12 @@ The \module{csv} module defines the following functions: \begin{funcdesc}{reader}{csvfile\optional{, - dialect=\code{'excel'}\optional{, fmtparam}}} + dialect=\code{'excel'}}\optional{, fmtparam}} Return a reader object which will iterate over lines in the given {}\var{csvfile}. \var{csvfile} can be any object which supports the iterator protocol and returns a string each time its \method{next} -method is called. If \var{csvfile} is a file object, it must be opened with +method is called - file objects and list objects are both suitable. +If \var{csvfile} is a file object, it must be opened with the 'b' flag on platforms where that makes a difference. An optional {}\var{dialect} parameter can be given which is used to define a set of parameters specific to a particular CSV @@ -71,7 +72,7 @@ \end{funcdesc} \begin{funcdesc}{writer}{csvfile\optional{, - dialect=\code{'excel'}\optional{, fmtparam}}} + dialect=\code{'excel'}}\optional{, fmtparam}} Return a writer object responsible for converting the user's data into delimited strings on the given file-like object. \var{csvfile} can be any object with a \function{write} method. If \var{csvfile} is a file object, @@ -94,9 +95,14 @@ with \function{str()} before being written. \end{funcdesc} -\begin{funcdesc}{register_dialect}{name, dialect} -Associate \var{dialect} with \var{name}. \var{dialect} must be a subclass -of \class{csv.Dialect}. \var{name} must be a string or Unicode object. +\begin{funcdesc}{register_dialect}{name\optional{, dialect}\optional{, fmtparam}} +Associate \var{dialect} with \var{name}. \var{name} must be a string +or Unicode object. The dialect can be specified either by passing a +sub-class of \class{Dialect}, or by \var{fmtparam} keyword arguments, +or both, with keyword arguments overriding parameters of the dialect. +For more information about the dialect and formatting parameters, see +section~\ref{csv-fmt-params}, ``Dialects and Formatting Parameters'' +for details of these parameters. \end{funcdesc} \begin{funcdesc}{unregister_dialect}{name} @@ -114,6 +120,12 @@ Return the names of all registered dialects. \end{funcdesc} +\begin{funcdesc}{field_size_limit}{\optional{new_limit}} + Returns the current maximum field size allowed by the parser. If + \var{new_limit} is given, this becomes the new limit. + \versionadded{2.5} +\end{funcdesc} + The \module{csv} module defines the following classes: @@ -208,19 +220,25 @@ \begin{datadesc}{QUOTE_MINIMAL} Instructs \class{writer} objects to only quote those fields which contain -the current \var{delimiter} or begin with the current \var{quotechar}. +special characters such as \var{delimiter}, \var{quotechar} or any of the +characters in \var{lineterminator}. \end{datadesc} \begin{datadesc}{QUOTE_NONNUMERIC} -Instructs \class{writer} objects to quote all non-numeric fields. +Instructs \class{writer} objects to quote all non-numeric +fields. + +Instructs the reader to convert all non-quoted fields to type \var{float}. \end{datadesc} \begin{datadesc}{QUOTE_NONE} Instructs \class{writer} objects to never quote fields. When the current \var{delimiter} occurs in output data it is preceded by the current -\var{escapechar} character. When \constant{QUOTE_NONE} is in effect, it -is an error not to have a single-character \var{escapechar} defined, even if -no data to be written contains the \var{delimiter} character. +\var{escapechar} character. If \var{escapechar} is not set, the writer +will raise \exception{Error} if any characters that require escaping +are encountered. + +Instructs \class{reader} to perform no special processing of quote characters. \end{datadesc} @@ -250,32 +268,43 @@ \end{memberdesc} \begin{memberdesc}[Dialect]{doublequote} -Controls how instances of \var{quotechar} appearing inside a field should be -themselves be quoted. When \constant{True}, the character is doubled. -When \constant{False}, the \var{escapechar} must be a one-character string -which is used as a prefix to the \var{quotechar}. It defaults to -\constant{True}. +Controls how instances of \var{quotechar} appearing inside a field should +be themselves be quoted. When \constant{True}, the character is doubled. +When \constant{False}, the \var{escapechar} is used as a prefix to the +\var{quotechar}. It defaults to \constant{True}. + +On output, if \var{doublequote} is \constant{False} and no +\var{escapechar} is set, \exception{Error} is raised if a \var{quotechar} +is found in a field. \end{memberdesc} \begin{memberdesc}[Dialect]{escapechar} -A one-character string used to escape the \var{delimiter} if \var{quoting} -is set to \constant{QUOTE_NONE}. It defaults to \constant{None}. +A one-character string used by the writer to escape the \var{delimiter} if +\var{quoting} is set to \constant{QUOTE_NONE} and the \var{quotechar} +if \var{doublequote} is \constant{False}. On reading, the \var{escapechar} +removes any special meaning from the following character. It defaults +to \constant{None}, which disables escaping. \end{memberdesc} \begin{memberdesc}[Dialect]{lineterminator} -The string used to terminate lines in the CSV file. It defaults to -\code{'\e r\e n'}. +The string used to terminate lines produced by the \class{writer}. +It defaults to \code{'\e r\e n'}. + +\note{The \class{reader} is hard-coded to recognise either \code{'\e r'} +or \code{'\e n'} as end-of-line, and ignores \var{lineterminator}. This +behavior may change in the future.} \end{memberdesc} \begin{memberdesc}[Dialect]{quotechar} -A one-character string used to quote elements containing the \var{delimiter} -or which start with the \var{quotechar}. It defaults to \code{'"'}. +A one-character string used to quote fields containing special characters, +such as the \var{delimiter} or \var{quotechar}, or which contain new-line +characters. It defaults to \code{'"'}. \end{memberdesc} \begin{memberdesc}[Dialect]{quoting} -Controls when quotes should be generated by the writer. It can take on any -of the \constant{QUOTE_*} constants (see section~\ref{csv-contents}) -and defaults to \constant{QUOTE_MINIMAL}. +Controls when quotes should be generated by the writer and recognised +by the reader. It can take on any of the \constant{QUOTE_*} constants +(see section~\ref{csv-contents}) and defaults to \constant{QUOTE_MINIMAL}. \end{memberdesc} \begin{memberdesc}[Dialect]{skipinitialspace} @@ -294,6 +323,17 @@ according to the current dialect. \end{methoddesc} +Reader objects have the following public attributes: + +\begin{memberdesc}[csv reader]{dialect} +A read-only description of the dialect in use by the parser. +\end{memberdesc} + +\begin{memberdesc}[csv reader]{line_num} + The number of lines read from the source iterator. This is not the same + as the number of records returned, as records can span multiple lines. +\end{memberdesc} + \subsection{Writer Objects} @@ -317,10 +357,17 @@ according to the current dialect. \end{methoddesc} +Writer objects have the following public attribute: + +\begin{memberdesc}[csv writer]{dialect} +A read-only description of the dialect in use by the writer. +\end{memberdesc} + + \subsection{Examples} -The ``Hello, world'' of csv reading is +The simplest example of reading a CSV file: \begin{verbatim} import csv @@ -329,20 +376,86 @@ print row \end{verbatim} -To print just the first and last columns of each row try +Reading a file with an alternate format: \begin{verbatim} import csv -reader = csv.reader(open("some.csv", "rb")) +reader = csv.reader(open("passwd", "rb"), delimiter=':', quoting=csv.QUOTE_NONE) for row in reader: - print row[0], row[-1] + print row \end{verbatim} -The corresponding simplest possible writing example is +The corresponding simplest possible writing example is: \begin{verbatim} import csv writer = csv.writer(open("some.csv", "wb")) -for row in someiterable: - writer.writerow(row) +writer.writerows(someiterable) +\end{verbatim} + +Registering a new dialect: + +\begin{verbatim} +import csv + +csv.register_dialect('unixpwd', delimiter=':', quoting=csv.QUOTE_NONE) + +reader = csv.reader(open("passwd", "rb"), 'unixpwd') +\end{verbatim} + +A slightly more advanced use of the reader - catching and reporting errors: + +\begin{verbatim} +import csv, sys +filename = "some.csv" +reader = csv.reader(open(filename, "rb")) +try: + for row in reader: + print row +except csv.Error, e: + sys.exit('file %s, line %d: %s' % (filename, reader.line_num, e)) \end{verbatim} + +And while the module doesn't directly support parsing strings, it can +easily be done: + +\begin{verbatim} +import csv +print csv.reader(['one,two,three'])[0] +\end{verbatim} + +The \module{csv} module doesn't directly support reading and writing +Unicode, but it is 8-bit clean save for some problems with \ASCII{} NUL +characters, so you can write classes that handle the encoding and decoding +for you as long as you avoid encodings like utf-16 that use NULs. + +\begin{verbatim} +import csv + +class UnicodeReader: + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + self.reader = csv.reader(f, dialect=dialect, **kwds) + self.encoding = encoding + + def next(self): + row = self.reader.next() + return [unicode(s, self.encoding) for s in row] + + def __iter__(self): + return self + +class UnicodeWriter: + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + self.writer = csv.writer(f, dialect=dialect, **kwds) + self.encoding = encoding + + def writerow(self, row): + self.writer.writerow([s.encode("utf-8") for s in row]) + + def writerows(self, rows): + for row in rows: + self.writerow(row) +\end{verbatim} + +They should work just like the \class{csv.reader} and \class{csv.writer} +classes but add an \var{encoding} parameter. Index: libcurses.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libcurses.tex,v retrieving revision 1.39.2.2 retrieving revision 1.39.2.3 diff -u -d -r1.39.2.2 -r1.39.2.3 --- libcurses.tex 7 Jan 2005 06:57:06 -0000 1.39.2.2 +++ libcurses.tex 16 Oct 2005 05:23:57 -0000 1.39.2.3 @@ -161,7 +161,7 @@ calls, LINES is set to 1; the capabilities clear, cup, cud, cud1, cuu1, cuu, vpa are disabled; and the home string is set to the value of cr. The effect is that the cursor is confined to the current line, and so -are screen updates. This may be used for enabling cgaracter-at-a-time +are screen updates. This may be used for enabling character-at-a-time line editing without touching the rest of the screen. \end{funcdesc} @@ -391,7 +391,7 @@ \begin{funcdesc}{pair_content}{pair_number} Returns a tuple \code{(\var{fg}, \var{bg})} containing the colors for the requested color pair. The value of \var{pair_number} must be -between \code{0} and \code{\constant{COLOR_PAIRS} - 1}. +between \code{1} and \code{\constant{COLOR_PAIRS} - 1}. \end{funcdesc} \begin{funcdesc}{pair_number}{attr} @@ -635,8 +635,8 @@ \lineiii{bs}{Bottom}{\constant{ACS_HLINE}} \lineiii{tl}{Upper-left corner}{\constant{ACS_ULCORNER}} \lineiii{tr}{Upper-right corner}{\constant{ACS_URCORNER}} - \lineiii{bl}{Bottom-left corner}{\constant{ACS_BLCORNER}} - \lineiii{br}{Bottom-right corner}{\constant{ACS_BRCORNER}} + \lineiii{bl}{Bottom-left corner}{\constant{ACS_LLCORNER}} + \lineiii{br}{Bottom-right corner}{\constant{ACS_LRCORNER}} \end{tableiii} \end{methoddesc} Index: libdatetime.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libdatetime.tex,v retrieving revision 1.44.6.2 retrieving revision 1.44.6.3 diff -u -d -r1.44.6.2 -r1.44.6.3 --- libdatetime.tex 7 Jan 2005 06:57:06 -0000 1.44.6.2 +++ libdatetime.tex 16 Oct 2005 05:23:57 -0000 1.44.6.3 @@ -23,7 +23,7 @@ time adjustment. Whether a naive \class{datetime} object represents Coordinated Universal Time (UTC), local time, or time in some other timezone is purely up to the program, just like it's up to the program -whether a particular number represents meters, miles, or mass. Naive +whether a particular number represents metres, miles, or mass. Naive \class{datetime} objects are easy to understand and to work with, at the cost of ignoring some aspects of reality. @@ -624,6 +624,17 @@ ignored. \end{methoddesc} +\begin{methoddesc}{strptime}{date_string, format} + Return a \class{datetime} corresponding to \var{date_string}, parsed + according to \var{format}. This is equivalent to + \code{datetime(*(time.strptime(date_string, + format)[0:6]))}. \exception{ValueError} is raised if the date_string and + format can't be parsed by \function{time.strptime()} or if it returns a + value which isn't a time tuple. + + \versionadded{2.5} +\end{methoddesc} + Class attributes: \begin{memberdesc}{min} Index: libdecimal.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libdecimal.tex,v retrieving revision 1.24.4.1 retrieving revision 1.24.4.2 diff -u -d -r1.24.4.1 -r1.24.4.2 --- libdecimal.tex 7 Jan 2005 06:57:06 -0000 1.24.4.1 +++ libdecimal.tex 16 Oct 2005 05:23:57 -0000 1.24.4.2 @@ -32,7 +32,7 @@ For this reason, decimal would be preferred in accounting applications which have strict equality invariants. -\item The decimal module incorporates notion of significant places so that +\item The decimal module incorporates a notion of significant places so that \samp{1.30 + 1.20} is \constant{2.50}. The trailing zero is kept to indicate significance. This is the customary presentation for monetary applications. For multiplication, the ``schoolbook'' approach uses all the figures in the @@ -84,7 +84,7 @@ \constant{Subnormal}, \constant{Overflow}, and \constant{Underflow}. For each signal there is a flag and a trap enabler. When a signal is -encountered, its flag incremented from zero and, then, if the trap enabler +encountered, its flag is incremented from zero and, then, if the trap enabler is set to one, an exception is raised. Flags are sticky, so the user needs to reset them before monitoring a calculation. @@ -119,7 +119,7 @@ \end{verbatim} -Decimal instances can be constructed from integers, strings or tuples. To +Decimal instances can be constructed from integers, strings, or tuples. To create a Decimal from a \class{float}, first convert it to a string. This serves as an explicit reminder of the details of the conversion (including representation error). Decimal numbers include special values such as @@ -160,7 +160,7 @@ \end{verbatim} -Decimals interact well with much of the rest of python. Here is a small +Decimals interact well with much of the rest of Python. Here is a small decimal floating point flying circus: \begin{verbatim} @@ -501,13 +501,15 @@ arithmetic operations in the context. The \var{rounding} option is one of: - \constant{ROUND_CEILING} (towards \constant{Infinity}), - \constant{ROUND_DOWN} (towards zero), - \constant{ROUND_FLOOR} (towards \constant{-Infinity}), - \constant{ROUND_HALF_DOWN} (towards zero), - \constant{ROUND_HALF_EVEN}, - \constant{ROUND_HALF_UP} (away from zero), or - \constant{ROUND_UP} (away from zero). + \begin{itemize} + \item \constant{ROUND_CEILING} (towards \constant{Infinity}), + \item \constant{ROUND_DOWN} (towards zero), + \item \constant{ROUND_FLOOR} (towards \constant{-Infinity}), + \item \constant{ROUND_HALF_DOWN} (to nearest with ties going towards zero), + \item \constant{ROUND_HALF_EVEN} (to nearest with ties going to nearest even integer), + \item \constant{ROUND_HALF_UP} (to nearest with ties going away from zero), or + \item \constant{ROUND_UP} (away from zero). + \end{itemize} The \var{traps} and \var{flags} fields list any signals to be set. Generally, new contexts should only set traps and leave the flags clear. @@ -525,11 +527,11 @@ large number of methods for doing arithmetic directly in a given context. \begin{methoddesc}{clear_flags}{} - Sets all of the flags to \constant{0}. + Resets all of the flags to \constant{0}. \end{methoddesc} \begin{methoddesc}{copy}{} - Returns a duplicate of the context. + Return a duplicate of the context. \end{methoddesc} \begin{methoddesc}{create_decimal}{num} @@ -691,7 +693,7 @@ Return the square root to full precision. \end{methoddesc} -\begin{methoddesc}{substract}{x, y} +\begin{methoddesc}{subtract}{x, y} Return the difference between \var{x} and \var{y}. \end{methoddesc} @@ -741,7 +743,7 @@ \end{classdesc*} \begin{classdesc*}{DecimalException} - Base class for other signals and is a subclass of + Base class for other signals and a subclass of \exception{ArithmeticError}. \end{classdesc*} @@ -834,6 +836,8 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Floating Point Notes \label{decimal-notes}} +\subsubsection{Mitigating round-off error with increased precision} + The use of decimal floating point eliminates decimal representation error (making it possible to represent \constant{0.1} exactly); however, some operations can still incur round-off error when non-zero digits exceed the @@ -847,7 +851,7 @@ \begin{verbatim} # Examples from Seminumerical Algorithms, Section 4.2.2. ->>> from decimal import * +>>> from decimal import Decimal, getcontext >>> getcontext().prec = 8 >>> u, v, w = Decimal(11111113), Decimal(-11111111), Decimal('7.51111111') @@ -881,6 +885,7 @@ Decimal("0.0060000") \end{verbatim} +\subsubsection{Special values} The number system for the \module{decimal} module provides special values including \constant{NaN}, \constant{sNaN}, \constant{-Infinity}, @@ -1118,3 +1123,166 @@ return +s \end{verbatim} + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Decimal FAQ \label{decimal-faq}} + +Q. It is cumbersome to type \code{decimal.Decimal('1234.5')}. Is there a way +to minimize typing when using the interactive interpreter? + +A. Some users abbreviate the constructor to just a single letter: + +\begin{verbatim} +>>> D = decimal.Decimal +>>> D('1.23') + D('3.45') +Decimal("4.68") +\end{verbatim} + + +Q. In a fixed-point application with two decimal places, some inputs +have many places and need to be rounded. Others are not supposed to have +excess digits and need to be validated. What methods should be used? + +A. The \method{quantize()} method rounds to a fixed number of decimal places. +If the \constant{Inexact} trap is set, it is also useful for validation: + +\begin{verbatim} +>>> TWOPLACES = Decimal(10) ** -2 # same as Decimal('0.01') + +>>> # Round to two places +>>> Decimal("3.214").quantize(TWOPLACES) +Decimal("3.21") + +>>> # Validate that a number does not exceed two places +>>> Decimal("3.21").quantize(TWOPLACES, context=Context(traps=[Inexact])) +Decimal("3.21") + +>>> Decimal("3.214").quantize(TWOPLACES, context=Context(traps=[Inexact])) +Traceback (most recent call last): + ... +Inexact: Changed in rounding +\end{verbatim} + + +Q. Once I have valid two place inputs, how do I maintain that invariant +throughout an application? + +A. Some operations like addition and subtraction automatically preserve fixed +point. Others, like multiplication and division, change the number of decimal +places and need to be followed-up with a \method{quantize()} step. + + +Q. There are many ways to express the same value. The numbers +\constant{200}, \constant{200.000}, \constant{2E2}, and \constant{.02E+4} all +have the same value at various precisions. Is there a way to transform them to +a single recognizable canonical value? + +A. The \method{normalize()} method maps all equivalent values to a single +representive: + +\begin{verbatim} +>>> values = map(Decimal, '200 200.000 2E2 .02E+4'.split()) +>>> [v.normalize() for v in values] +[Decimal("2E+2"), Decimal("2E+2"), Decimal("2E+2"), Decimal("2E+2")] +\end{verbatim} + + +Q. Some decimal values always print with exponential notation. Is there +a way to get a non-exponential representation? + +A. For some values, exponential notation is the only way to express +the number of significant places in the coefficient. For example, +expressing \constant{5.0E+3} as \constant{5000} keeps the value +constant but cannot show the original's two-place significance. + + +Q. Is there a way to convert a regular float to a \class{Decimal}? + +A. Yes, all binary floating point numbers can be exactly expressed as a +Decimal. An exact conversion may take more precision than intuition would +suggest, so trapping \constant{Inexact} will signal a need for more precision: + +\begin{verbatim} +def floatToDecimal(f): + "Convert a floating point number to a Decimal with no loss of information" + # Transform (exactly) a float to a mantissa (0.5 <= abs(m) < 1.0) and an + # exponent. Double the mantissa until it is an integer. Use the integer + # mantissa and exponent to compute an equivalent Decimal. If this cannot + # be done exactly, then retry with more precision. + + mantissa, exponent = math.frexp(f) + while mantissa != int(mantissa): + mantissa *= 2.0 + exponent -= 1 + mantissa = int(mantissa) + + oldcontext = getcontext() + setcontext(Context(traps=[Inexact])) + try: + while True: + try: + return mantissa * Decimal(2) ** exponent + except Inexact: + getcontext().prec += 1 + finally: + setcontext(oldcontext) +\end{verbatim} + + +Q. Why isn't the \function{floatToDecimal()} routine included in the module? + +A. There is some question about whether it is advisable to mix binary and +decimal floating point. Also, its use requires some care to avoid the +representation issues associated with binary floating point: + +\begin{verbatim} +>>> floatToDecimal(1.1) +Decimal("1.100000000000000088817841970012523233890533447265625") +\end{verbatim} + + +Q. Within a complex calculation, how can I make sure that I haven't gotten a +spurious result because of insufficient precision or rounding anomalies. + +A. The decimal module makes it easy to test results. A best practice is to +re-run calculations using greater precision and with various rounding modes. +Widely differing results indicate insufficient precision, rounding mode +issues, ill-conditioned inputs, or a numerically unstable algorithm. + + +Q. I noticed that context precision is applied to the results of operations +but not to the inputs. Is there anything to watch out for when mixing +values of different precisions? + +A. Yes. The principle is that all values are considered to be exact and so +is the arithmetic on those values. Only the results are rounded. The +advantage for inputs is that ``what you type is what you get''. A +disadvantage is that the results can look odd if you forget that the inputs +haven't been rounded: + +\begin{verbatim} +>>> getcontext().prec = 3 +>>> Decimal('3.104') + D('2.104') +Decimal("5.21") +>>> Decimal('3.104') + D('0.000') + D('2.104') +Decimal("5.20") +\end{verbatim} + +The solution is either to increase precision or to force rounding of inputs +using the unary plus operation: + +\begin{verbatim} +>>> getcontext().prec = 3 +>>> +Decimal('1.23456789') # unary plus triggers rounding +Decimal("1.23") +\end{verbatim} + +Alternatively, inputs can be rounded upon creation using the +\method{Context.create_decimal()} method: + +\begin{verbatim} +>>> Context(prec=5, rounding=ROUND_DOWN).create_decimal('1.2345678') +Decimal("1.2345") +\end{verbatim} Index: libdis.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libdis.tex,v retrieving revision 1.37.2.2 retrieving revision 1.37.2.3 diff -u -d -r1.37.2.2 -r1.37.2.3 --- libdis.tex 7 Jan 2005 06:57:22 -0000 1.37.2.2 +++ libdis.tex 16 Oct 2005 05:23:57 -0000 1.37.2.3 @@ -26,9 +26,7 @@ 2 0 LOAD_GLOBAL 0 (len) 3 LOAD_FAST 0 (alist) 6 CALL_FUNCTION 1 - 9 RETURN_VALUE - 10 LOAD_CONST 0 (None) - 13 RETURN_VALUE + 9 RETURN_VALUE \end{verbatim} (The ``2'' is a line number). @@ -126,6 +124,10 @@ Indicates end-of-code to the compiler, not used by the interpreter. \end{opcodedesc} +\begin{opcodedesc}{NOP}{} +Do nothing code. Used as a placeholder by the bytecode optimizer. +\end{opcodedesc} + \begin{opcodedesc}{POP_TOP}{} Removes the top-of-stack (TOS) item. \end{opcodedesc} @@ -396,6 +398,10 @@ instruction). \end{opcodedesc} +\begin{opcodedesc}{LIST_APPEND}{} +Calls \code{list.append(TOS1, TOS)}. Used to implement list comprehensions. +\end{opcodedesc} + \begin{opcodedesc}{LOAD_LOCALS}{} Pushes a reference to the locals of the current scope on the stack. This is used in the code for a class definition: After the class body Index: libfcntl.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libfcntl.tex,v retrieving revision 1.29.2.2 retrieving revision 1.29.2.3 diff -u -d -r1.29.2.2 -r1.29.2.3 --- libfcntl.tex 7 Jan 2005 06:57:23 -0000 1.29.2.2 +++ libfcntl.tex 16 Oct 2005 05:23:57 -0000 1.29.2.3 @@ -66,8 +66,9 @@ If it is false, the buffer's mutability is ignored and behaviour is as for a read-only buffer, except that the 1024 byte limit mentioned - above is avoided -- so long as the buffer you pass is longer than - what the operating system wants to put there, things should work. + above is avoided -- so long as the buffer you pass is as least as + long as what the operating system wants to put there, things should + work. If \var{mutate_flag} is true, then the buffer is (in effect) passed to the underlying \function{ioctl()} system call, the latter's @@ -165,9 +166,9 @@ better. \begin{seealso} - \seemodule{os}{The \function{os.open()} function supports locking flags - and is available on a wider variety of platforms than - the \function{lockf()} and \function{flock()} - functions, providing a more platform-independent file - locking facility.} + \seemodule{os}{If the locking flags \constant{O_SHLOCK} and + \constant{O_EXLOCK} are present in the \module{os} module, + the \function{os.open()} function provides a more + platform-independent alternative to the \function{lockf()} + and \function{flock()} functions.} \end{seealso} Index: libfuncs.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libfuncs.tex,v retrieving revision 1.109.2.2 retrieving revision 1.109.2.3 diff -u -d -r1.109.2.2 -r1.109.2.3 --- libfuncs.tex 7 Jan 2005 06:57:23 -0000 1.109.2.2 +++ libfuncs.tex 16 Oct 2005 05:23:57 -0000 1.109.2.3 @@ -60,6 +60,32 @@ complex number, its magnitude is returned. \end{funcdesc} +\begin{funcdesc}{all}{iterable} + Return True if all elements of the \var{iterable} are true. + Equivalent to: + \begin{verbatim} + def all(iterable): + for element in iterable: + if not element: + return False + return True + \end{verbatim} + \versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{any}{iterable} + Return True if any element of the \var{iterable} is true. + Equivalent to: + \begin{verbatim} + def any(iterable): + for element in iterable: + if element: + return True + return False + \end{verbatim} + \versionadded{2.5} +\end{funcdesc} + \begin{funcdesc}{basestring}{} This abstract type is the superclass for \class{str} and \class{unicode}. It cannot be called or instantiated, but it can be used to test whether @@ -271,7 +297,7 @@ consisting of their quotient and remainder when using long division. With mixed operand types, the rules for binary arithmetic operators apply. For plain and long integers, the result is the same as - \code{(\var{a} / \var{b}, \var{a} \%{} \var{b})}. + \code{(\var{a} // \var{b}, \var{a} \%{} \var{b})}. For floating point numbers the result is \code{(\var{q}, \var{a} \%{} \var{b})}, where \var{q} is usually \code{math.floor(\var{a} / \var{b})} but may be 1 less than that. In any case \code{\var{q} * @@ -416,9 +442,12 @@ after any I/O has been performed, and there's no reliable way to determine whether this is the case.} - The \function{file()} constructor is new in Python 2.2. The previous - spelling, \function{open()}, is retained for compatibility, and is an - alias for \function{file()}. + The \function{file()} constructor is new in Python 2.2 and is an + alias for \function{open()}. Both spellings are equivalent. The + intent is for \function{open()} to continue to be preferred for use + as a factory function which returns a new \class{file} object. The + spelling, \class{file} is more suited to type testing (for example, + writing \samp{isinstance(f, file)}). \end{funcdesc} \begin{funcdesc}{filter}{function, list} @@ -654,7 +683,7 @@ \versionchanged[Added support for the optional \var{key} argument]{2.5} \end{funcdesc} -\begin{funcdesc}{min}{s\optional{, args...}} +\begin{funcdesc}{min}{s\optional{, args...}\optional{key}} With a single argument \var{s}, return the smallest item of a non-empty sequence (such as a string, tuple or list). With more than one argument, return the smallest of the arguments. @@ -687,11 +716,16 @@ \end{funcdesc} \begin{funcdesc}{ord}{c} - Return the \ASCII{} value of a string of one character or a Unicode - character. E.g., \code{ord('a')} returns the integer \code{97}, + Given a string of length one, return an integer representing the + Unicode code point of the character when the argument is a unicode object, + or the value of the byte when the argument is an 8-bit string. + For example, \code{ord('a')} returns the integer \code{97}, \code{ord(u'\e u2020')} returns \code{8224}. This is the inverse of - \function{chr()} for strings and of \function{unichr()} for Unicode - characters. + \function{chr()} for 8-bit strings and of \function{unichr()} for unicode + objects. If a unicode argument is given and Python was built with + UCS2 Unicode, then the character's code point must be in the range + [0..65535] inclusive; otherwise the string length is two, and a + \exception{TypeError} will be raised. \end{funcdesc} \begin{funcdesc}{pow}{x, y\optional{, z}} @@ -726,6 +760,7 @@ \begin{verbatim} class C(object): + def __init__(self): self.__x = None def getx(self): return self.__x def setx(self, value): self.__x = value def delx(self): del self.__x @@ -745,7 +780,7 @@ \var{start} + 2 * \var{step}, \ldots]}. If \var{step} is positive, the last element is the largest \code{\var{start} + \var{i} * \var{step}} less than \var{stop}; if \var{step} is negative, the last - element is the largest \code{\var{start} + \var{i} * \var{step}} + element is the smallest \code{\var{start} + \var{i} * \var{step}} greater than \var{stop}. \var{step} must not be zero (or else \exception{ValueError} is raised). Example: @@ -1022,33 +1057,38 @@ \begin{funcdesc}{type}{object} Return the type of an \var{object}. The return value is a - type\obindex{type} object. The standard module - \module{types}\refstmodindex{types} defines names for all built-in - types that don't already have built-in names. - For instance: + type\obindex{type} object. The \function{isinstance()} built-in + function is recommended for testing the type of an object. + + With three arguments, \function{type} functions as a constructor + as detailed below. +\end{funcdesc} + +\begin{funcdesc}{type}{name, bases, dict} + Return a new type object. This is essentially a dynamic form of the + \keyword{class} statement. The \var{name} string is the class name + and becomes the \member{__name__} attribute; the \var{bases} tuple + itemizes the base classes and becomes the \member{__bases__} + attribute; and the \var{dict} dictionary is the namespace containing + definitions for class body and becomes the \member{__dict__} + attribute. For example, the following two statements create + identical \class{type} objects: \begin{verbatim} ->>> import types ->>> x = 'abc' ->>> if type(x) is str: print "It's a string" -... -It's a string ->>> def f(): pass -... ->>> if type(f) is types.FunctionType: print "It's a function" -... -It's a function + >>> class X(object): + ... a = 1 + ... + >>> X = type('X', (object,), dict(a=1)) \end{verbatim} - - The \function{isinstance()} built-in function is recommended for - testing the type of an object. +\versionadded{2.2} \end{funcdesc} \begin{funcdesc}{unichr}{i} Return the Unicode string of one character whose Unicode code is the integer \var{i}. For example, \code{unichr(97)} returns the string \code{u'a'}. This is the inverse of \function{ord()} for Unicode - strings. The argument must be in the range [0..65535], inclusive. + strings. The valid range for the argument depends how Python was + configured -- it may be either UCS2 [0..0xFFFF] or UCS4 [0..0x10FFFF]. \exception{ValueError} is raised otherwise. \versionadded{2.0} \end{funcdesc} @@ -1118,12 +1158,12 @@ that the number of elements fit in a native C long.} \end{funcdesc} -\begin{funcdesc}{zip}{\optional{seq1, \moreargs}} +\begin{funcdesc}{zip}{\optional{iterable, \moreargs}} This function returns a list of tuples, where the \var{i}-th tuple contains - the \var{i}-th element from each of the argument sequences. + the \var{i}-th element from each of the argument sequences or iterables. The returned list is truncated in length to the length of - the shortest argument sequence. When there are multiple argument - sequences which are all of the same length, \function{zip()} is + the shortest argument sequence. When there are multiple arguments + which are all of the same length, \function{zip()} is similar to \function{map()} with an initial argument of \code{None}. With a single sequence argument, it returns a list of 1-tuples. With no arguments, it returns an empty list. @@ -1142,7 +1182,7 @@ There are several built-in functions that are no longer essential to learn, know or use in modern Python programming. They have been kept here to -maintain backwards compatability with programs written for older versions +maintain backwards compatibility with programs written for older versions of Python. Python programmers, trainers, students and bookwriters should feel free to Index: libgc.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libgc.tex,v retrieving revision 1.10.2.2 retrieving revision 1.10.2.3 diff -u -d -r1.10.2.2 -r1.10.2.3 --- libgc.tex 7 Jan 2005 06:57:23 -0000 1.10.2.2 +++ libgc.tex 16 Oct 2005 05:23:57 -0000 1.10.2.3 @@ -6,11 +6,6 @@ \moduleauthor{Neil Schemenauer}{nas at arctrix.com} \sectionauthor{Neil Schemenauer}{nas at arctrix.com} -The \module{gc} module is only available if the interpreter was built -with the optional cyclic garbage detector (enabled by default). If -this was not enabled, an \exception{ImportError} is raised by attempts -to import this module. - This module provides an interface to the optional garbage collector. It provides the ability to disable the collector, tune the collection frequency, and set debugging options. It also provides access to @@ -19,7 +14,9 @@ can disable the collector if you are sure your program does not create reference cycles. Automatic collection can be disabled by calling \code{gc.disable()}. To debug a leaking program call -\code{gc.set_debug(gc.DEBUG_LEAK)}. +\code{gc.set_debug(gc.DEBUG_LEAK)}. Notice that this includes +\code{gc.DEBUG_SAVEALL}, causing garbage-collected objects to be +saved in gc.garbage for inspection. The \module{gc} module provides the following functions: Index: libglob.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libglob.tex,v retrieving revision 1.12.26.1 retrieving revision 1.12.26.2 diff -u -d -r1.12.26.1 -r1.12.26.2 --- libglob.tex 7 Jan 2005 06:57:23 -0000 1.12.26.1 +++ libglob.tex 16 Oct 2005 05:23:57 -0000 1.12.26.2 @@ -16,7 +16,7 @@ \index{filenames!pathname expansion} \begin{funcdesc}{glob}{pathname} -Returns a possibly-empty list of path names that match \var{pathname}, +Return a possibly-empty list of path names that match \var{pathname}, which must be a string containing a path specification. \var{pathname} can be either absolute (like \file{/usr/src/Python-1.5/Makefile}) or relative (like @@ -24,6 +24,12 @@ Broken symlinks are included in the results (as in the shell). \end{funcdesc} +\begin{funcdesc}{iglob}{pathname} +Return an iterator which yields the same values as \function{glob()} +without actually storing them all simultaneously. +\versionadded{2.5} +\end{funcdesc} + For example, consider a directory containing only the following files: \file{1.gif}, \file{2.txt}, and \file{card.gif}. \function{glob()} will produce the following results. Notice how any leading components Index: libgrp.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libgrp.tex,v retrieving revision 1.16 retrieving revision 1.16.2.1 diff -u -d -r1.16 -r1.16.2.1 --- libgrp.tex 1 Mar 2002 10:38:44 -0000 1.16 +++ libgrp.tex 16 Oct 2005 05:23:57 -0000 1.16.2.1 @@ -45,4 +45,5 @@ \begin{seealso} \seemodule{pwd}{An interface to the user database, similar to this.} + \seemodule{spwd}{An interface to the shadow password database, similar to this.} \end{seealso} Index: libhmac.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libhmac.tex,v retrieving revision 1.1 retrieving revision 1.1.14.1 diff -u -d -r1.1 -r1.1.14.1 --- libhmac.tex 11 Sep 2001 16:56:09 -0000 1.1 +++ libhmac.tex 16 Oct 2005 05:23:57 -0000 1.1.14.1 @@ -14,8 +14,10 @@ \begin{funcdesc}{new}{key\optional{, msg\optional{, digestmod}}} Return a new hmac object. If \var{msg} is present, the method call \code{update(\var{msg})} is made. \var{digestmod} is the digest - module for the HMAC object to use. It defaults to the - \refmodule{md5} module. + constructor or module for the HMAC object to use. It defaults to + the \code{\refmodule{hashlib}.md5} constructor. \note{The md5 hash + has known weaknesses but remains the default for backwards compatibility. + Choose a better one for your application.} \end{funcdesc} An HMAC object has the following methods: @@ -29,14 +31,14 @@ \begin{methoddesc}[hmac]{digest}{} Return the digest of the strings passed to the \method{update()} - method so far. This is a 16-byte string (for \refmodule{md5}) or a - 20-byte string (for \refmodule{sha}) which may contain non-\ASCII{} - characters, including NUL bytes. + method so far. This string will be the same length as the + \var{digest_size} of the digest given to the constructor. It + may contain non-\ASCII{} characters, including NUL bytes. \end{methoddesc} \begin{methoddesc}[hmac]{hexdigest}{} - Like \method{digest()} except the digest is returned as a string of - length 32 for \refmodule{md5} (40 for \refmodule{sha}), containing + Like \method{digest()} except the digest is returned as a string + twice the length containing only hexadecimal digits. This may be used to exchange the value safely in email or other non-binary environments. \end{methoddesc} @@ -46,3 +48,7 @@ efficiently compute the digests of strings that share a common initial substring. \end{methoddesc} + +\begin{seealso} + \seemodule{hashlib}{The python module providing secure hash functions.} +\end{seealso} Index: libhttplib.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libhttplib.tex,v retrieving revision 1.31.2.2 retrieving revision 1.31.2.3 diff -u -d -r1.31.2.2 -r1.31.2.3 --- libhttplib.tex 7 Jan 2005 06:57:23 -0000 1.31.2.2 +++ libhttplib.tex 16 Oct 2005 05:23:57 -0000 1.31.2.3 @@ -146,7 +146,7 @@ {http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.1.2}} \lineiii{PROCESSING}{\code{102}} {WEBDAV, \ulink{RFC 2518, Section 10.1} - {http://www.webdav.org/specs/rfc2518.htm#STATUS_102}} + {http://www.webdav.org/specs/rfc2518.html#STATUS_102}} \lineiii{OK}{\code{200}} {HTTP/1.1, \ulink{RFC 2616, Section 10.2.1} @@ -171,7 +171,7 @@ {http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.2.7}} \lineiii{MULTI_STATUS}{\code{207}} {WEBDAV \ulink{RFC 2518, Section 10.2} - {http://www.webdav.org/specs/rfc2518.htm#STATUS_207}} + {http://www.webdav.org/specs/rfc2518.html#STATUS_207}} \lineiii{IM_USED}{\code{226}} {Delta encoding in HTTP, \rfc{3229}, Section 10.4.1} @@ -253,13 +253,13 @@ {http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.4.18}} \lineiii{UNPROCESSABLE_ENTITY}{\code{422}} {WEBDAV, \ulink{RFC 2518, Section 10.3} - {http://www.webdav.org/specs/rfc2518.htm#STATUS_422}} + {http://www.webdav.org/specs/rfc2518.html#STATUS_422}} \lineiii{LOCKED}{\code{423}} {WEBDAV \ulink{RFC 2518, Section 10.4} - {http://www.webdav.org/specs/rfc2518.htm#STATUS_423}} + {http://www.webdav.org/specs/rfc2518.html#STATUS_423}} \lineiii{FAILED_DEPENDENCY}{\code{424}} {WEBDAV, \ulink{RFC 2518, Section 10.5} - {http://www.webdav.org/specs/rfc2518.htm#STATUS_424}} + {http://www.webdav.org/specs/rfc2518.html#STATUS_424}} \lineiii{UPGRADE_REQUIRED}{\code{426}} {HTTP Upgrade to TLS, \rfc{2817}, Section 6} @@ -283,7 +283,7 @@ {http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.5.6}} \lineiii{INSUFFICIENT_STORAGE}{\code{507}} {WEBDAV, \ulink{RFC 2518, Section 10.6} - {http://www.webdav.org/specs/rfc2518.htm#STATUS_507}} + {http://www.webdav.org/specs/rfc2518.html#STATUS_507}} \lineiii{NOT_EXTENDED}{\code{510}} {An HTTP Extension Framework, \rfc{2774}, Section 7} \end{tableiii} @@ -304,6 +304,8 @@ \begin{methoddesc}{getresponse}{} Should be called after a request is sent to get the response from the server. Returns an \class{HTTPResponse} instance. +\note{Note that you must have read the whole response before you can send a new +request to the server.} \end{methoddesc} \begin{methoddesc}{set_debuglevel}{level} @@ -320,11 +322,9 @@ Close the connection to the server. \end{methoddesc} -\begin{methoddesc}{send}{data} -Send data to the server. This should be used directly only after the -\method{endheaders()} method has been called and before -\method{getreply()} has been called. -\end{methoddesc} +As an alternative to using the \method{request()} method described above, +you can also send your request step by step, by using the four functions +below. \begin{methoddesc}{putrequest}{request, selector\optional{, skip\_host\optional{, skip_accept_encoding}}} @@ -349,6 +349,11 @@ Send a blank line to the server, signalling the end of the headers. \end{methoddesc} +\begin{methoddesc}{send}{data} +Send data to the server. This should be used directly only after the +\method{endheaders()} method has been called and before +\method{getresponse()} is called. +\end{methoddesc} \subsection{HTTPResponse Objects \label{httpresponse-objects}} Index: libimaplib.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libimaplib.tex,v retrieving revision 1.20.2.2 retrieving revision 1.20.2.3 diff -u -d -r1.20.2.2 -r1.20.2.3 --- libimaplib.tex 7 Jan 2005 06:57:23 -0000 1.20.2.2 +++ libimaplib.tex 16 Oct 2005 05:23:57 -0000 1.20.2.3 @@ -143,6 +143,13 @@ is the header of the response, and the second part contains the data (ie: 'literal' value). +The \var{message_set} options to commands below is a string specifying one +or more messages to be acted upon. It may be a simple message number +(\code{'1'}), a range of message numbers (\code{'2:4'}), or a group of +non-contiguous ranges separated by commas (\code{'1:3,6:9'}). A range +can contain an asterisk to indicate an infinite upper bound +(\code{'3:*'}). + An \class{IMAP4} instance has the following methods: @@ -215,6 +222,11 @@ The method is non-standard, but is supported by the \samp{Cyrus} server. \end{methoddesc} +\begin{methoddesc}{getannotation}{mailbox, entry, attribute} + Retrieve the specified \samp{ANNOTATION}s for \var{mailbox}. + The method is non-standard, but is supported by the \samp{Cyrus} server. +\end{methoddesc} + \begin{methoddesc}{getquota}{root} Get the \samp{quota} \var{root}'s resource usage and limits. This method is part of the IMAP4 QUOTA extension defined in rfc2087. @@ -315,8 +327,7 @@ \end{methoddesc} \begin{methoddesc}{search}{charset, criterion\optional{, ...}} - Search mailbox for matching messages. Returned data contains a space - separated list of matching message numbers. \var{charset} may be + Search mailbox for matching messages. \var{charset} may be \code{None}, in which case no \samp{CHARSET} will be specified in the request to the server. The IMAP protocol requires that at least one criterion be specified; an exception will be raised when the server @@ -326,10 +337,10 @@ \begin{verbatim} # M is a connected IMAP4 instance... -msgnums = M.search(None, 'FROM', '"LDJ"') +typ, msgnums = M.search(None, 'FROM', '"LDJ"') # or: -msgnums = M.search(None, '(FROM "LDJ")') +typ, msgnums = M.search(None, '(FROM "LDJ")') \end{verbatim} \end{methoddesc} @@ -350,6 +361,11 @@ The method is non-standard, but is supported by the \samp{Cyrus} server. \end{methoddesc} +\begin{methoddesc}{setannotation}{mailbox, entry, attribute\optional{, ...}} + Set \samp{ANNOTATION}s for \var{mailbox}. + The method is non-standard, but is supported by the \samp{Cyrus} server. +\end{methoddesc} + \begin{methoddesc}{setquota}{root, limits} Set the \samp{quota} \var{root}'s resource \var{limits}. This method is part of the IMAP4 QUOTA extension defined in rfc2087. @@ -389,7 +405,18 @@ \end{methoddesc} \begin{methoddesc}{store}{message_set, command, flag_list} - Alters flag dispositions for messages in mailbox. + Alters flag dispositions for messages in mailbox. \var{command} is + specified by section 6.4.6 of \rfc{2060} as being one of "FLAGS", "+FLAGS", + or "-FLAGS", optionally with a suffix of ".SILENT". + + For example, to set the delete flag on all messages: + +\begin{verbatim} +typ, data = M.search(None, 'ALL') +for num in data[0].split(): + M.store(num, '+FLAGS', '\\Deleted') +M.expunge() +\end{verbatim} \end{methoddesc} \begin{methoddesc}{subscribe}{mailbox} @@ -473,5 +500,6 @@ for num in data[0].split(): typ, data = M.fetch(num, '(RFC822)') print 'Message %s\n%s\n' % (num, data[0][1]) +M.close() M.logout() \end{verbatim} Index: libimghdr.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libimghdr.tex,v retrieving revision 1.13 retrieving revision 1.13.26.1 diff -u -d -r1.13 -r1.13.26.1 --- libimghdr.tex 10 Oct 2000 17:03:45 -0000 1.13 +++ libimghdr.tex 16 Oct 2005 05:23:57 -0000 1.13.26.1 @@ -31,11 +31,13 @@ \lineii{'tiff'}{TIFF Files} \lineii{'rast'}{Sun Raster Files} \lineii{'xbm'}{X Bitmap Files} - \lineii{'jpeg'}{JPEG data in JFIF format} + \lineii{'jpeg'}{JPEG data in JFIF or Exif formats} \lineii{'bmp'}{BMP files} \lineii{'png'}{Portable Network Graphics} \end{tableii} +\versionadded[Exif detection]{2.5} + You can extend the list of file types \module{imghdr} can recognize by appending to this variable: Index: libimp.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libimp.tex,v retrieving revision 1.32.2.2 retrieving revision 1.32.2.3 diff -u -d -r1.32.2.2 -r1.32.2.3 --- libimp.tex 7 Jan 2005 06:57:24 -0000 1.32.2.2 +++ libimp.tex 16 Oct 2005 05:23:57 -0000 1.32.2.3 @@ -135,8 +135,8 @@ \end{datadesc} \begin{datadesc}{PY_RESOURCE} -The module was found as a Macintosh resource. This value can only be -returned on a Macintosh. +The module was found as a Mac OS 9 resource. This value can only be +returned on a Mac OS 9 or earlier Macintosh. \end{datadesc} \begin{datadesc}{PKG_DIRECTORY} Index: libitertools.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libitertools.tex,v retrieving revision 1.7.4.2 retrieving revision 1.7.4.3 diff -u -d -r1.7.4.2 -r1.7.4.3 --- libitertools.tex 7 Jan 2005 06:57:24 -0000 1.7.4.2 +++ libitertools.tex 16 Oct 2005 05:23:57 -0000 1.7.4.3 @@ -252,14 +252,12 @@ \begin{verbatim} def islice(iterable, *args): s = slice(*args) - next, stop, step = s.start or 0, s.stop, s.step or 1 - for cnt, element in enumerate(iterable): - if cnt < next: - continue - if stop is not None and cnt >= stop: - break - yield element - next += step + it = iter(xrange(s.start or 0, s.stop or sys.maxint, s.step or 1)) + nexti = it.next() + for i, element in enumerate(iterable): + if i == nexti: + yield element + nexti = it.next() \end{verbatim} If \var{start} is \code{None}, then iteration starts at zero. @@ -461,26 +459,26 @@ "Returns the nth item" return list(islice(iterable, n, n+1)) -def all(seq, pred=bool): - "Returns True if pred(x) is True for every element in the iterable" +def all(seq, pred=None): + "Returns True if pred(x) is true for every element in the iterable" for elem in ifilterfalse(pred, seq): return False return True -def any(seq, pred=bool): - "Returns True if pred(x) is True for at least one element in the iterable" +def any(seq, pred=None): + "Returns True if pred(x) is true for at least one element in the iterable" for elem in ifilter(pred, seq): return True return False -def no(seq, pred=bool): - "Returns True if pred(x) is False for every element in the iterable" +def no(seq, pred=None): + "Returns True if pred(x) is false for every element in the iterable" for elem in ifilter(pred, seq): return False return True -def quantify(seq, pred=bool): - "Count how many times the predicate is True in the sequence" +def quantify(seq, pred=None): + "Count how many times the predicate is true in the sequence" return sum(imap(pred, seq)) def padnone(seq): Index: liblogging.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/liblogging.tex,v retrieving revision 1.8.6.2 retrieving revision 1.8.6.3 diff -u -d -r1.8.6.2 -r1.8.6.3 --- liblogging.tex 7 Jan 2005 06:57:24 -0000 1.8.6.2 +++ liblogging.tex 16 Oct 2005 05:23:57 -0000 1.8.6.3 @@ -92,7 +92,7 @@ \item \class{FileHandler} instances send error messages to disk files. -\item \class{BaseRotatingHandler} is tha base class for handlers that +\item \class{BaseRotatingHandler} is the base class for handlers that rotate log files at a certain point. It is not meant to be instantiated directly. Instead, use \class{RotatingFileHandler} or \class{TimedRotatingFileHandler}. @@ -258,13 +258,32 @@ it as a \class{LogRecord} instance at the receiving end. \end{funcdesc} -\begin{funcdesc}{basicConfig}{} +\begin{funcdesc}{basicConfig}{\optional{**kwargs}} Does basic configuration for the logging system by creating a \class{StreamHandler} with a default \class{Formatter} and adding it to the root logger. The functions \function{debug()}, \function{info()}, \function{warning()}, \function{error()} and \function{critical()} will call \function{basicConfig()} automatically if no handlers are defined for the root logger. + +\versionchanged[Formerly, \function{basicConfig} did not take any keyword +arguments]{2.4} + +The following keyword arguments are supported. + +\begin{tableii}{l|l}{code}{Format}{Description} +\lineii{filename}{Specifies that a FileHandler be created, using the +specified filename, rather than a StreamHandler.} +\lineii{filemode}{Specifies the mode to open the file, if filename is +specified (if filemode is unspecified, it defaults to 'a').} +\lineii{format}{Use the specified format string for the handler.} +\lineii{datefmt}{Use the specified date/time format.} +\lineii{level}{Set the root logger level to the specified level.} +\lineii{stream}{Use the specified stream to initialize the StreamHandler. +Note that this argument is incompatible with 'filename' - if both +are present, 'stream' is ignored.} +\end{tableii} + \end{funcdesc} \begin{funcdesc}{shutdown}{} @@ -315,6 +334,20 @@ to be processed when the logger is the root logger, or delegation to the parent when the logger is a non-root logger). Note that the root logger is created with level \constant{WARNING}. + +The term "delegation to the parent" means that if a logger has a level +of NOTSET, its chain of ancestor loggers is traversed until either an +ancestor with a level other than NOTSET is found, or the root is +reached. + +If an ancestor is found with a level other than NOTSET, then that +ancestor's level is treated as the effective level of the logger where +the ancestor search began, and is used to determine how a logging +event is handled. + +If the root is reached, and it has a level of NOTSET, then all +messages will be processed. Otherwise, the root's level will be used +as the effective level. \end{methoddesc} \begin{methoddesc}{isEnabledFor}{lvl} @@ -415,6 +448,9 @@ \subsection{Basic example \label{minimal-example}} +\versionchanged[formerly \function{basicConfig} did not take any keyword +arguments]{2.4} + The \module{logging} package provides a lot of flexibility, and its configuration can appear daunting. This section demonstrates that simple use of the logging package is possible. @@ -1133,8 +1169,7 @@ \begin{classdesc}{SMTPHandler}{mailhost, fromaddr, toaddrs, subject} Returns a new instance of the \class{SMTPHandler} class. The instance is initialized with the from and to addresses and subject -line of the email. The \var{toaddrs} should be a list of strings without -domain names (That's what the \var{mailhost} is for). To specify a +line of the email. The \var{toaddrs} should be a list of strings. To specify a non-standard SMTP port, use the (host, port) tuple format for the \var{mailhost} argument. If you use a string, the standard SMTP port is used. @@ -1269,6 +1304,7 @@ \lineii{\%(msecs)d} {Millisecond portion of the time when the \class{LogRecord} was created.} \lineii{\%(thread)d} {Thread ID (if available).} +\lineii{\%(threadName)s} {Thread name (if available).} \lineii{\%(process)d} {Process ID (if available).} \lineii{\%(message)s} {The logged message, computed as \code{msg \% args}.} \end{tableii} @@ -1396,7 +1432,10 @@ will be sent as a file suitable for processing by \function{fileConfig()}. Returns a \class{Thread} instance on which you can call \method{start()} to start the server, and which you can \method{join()} when appropriate. -To stop the server, call \function{stopListening()}. +To stop the server, call \function{stopListening()}. To send a configuration +to the socket, read in the configuration file and send it to the socket +as a string of bytes preceded by a four-byte length packed in binary using +struct.\code{pack(">L", n)}. \end{funcdesc} \begin{funcdesc}{stopListening}{} Index: libmarshal.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libmarshal.tex,v retrieving revision 1.22.10.1 retrieving revision 1.22.10.2 diff -u -d -r1.22.10.1 -r1.22.10.2 --- libmarshal.tex 7 Jan 2005 06:57:24 -0000 1.22.10.1 +++ libmarshal.tex 16 Oct 2005 05:23:57 -0000 1.22.10.2 @@ -109,8 +109,9 @@ \begin{datadesc}{version} Indicates the format that the module uses. Version 0 is the - historical format, version 1 (added in Python 2.4) shares - interned strings. The current version is 1. + historical format, version 1 (added in Python 2.4) shares interned + strings and version 2 (added in Python 2.5) uses a binary format for + floating point numbers. The current version is 2. \versionadded{2.4} -\end{datadesc} \ No newline at end of file +\end{datadesc} Index: libmd5.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libmd5.tex,v retrieving revision 1.21 retrieving revision 1.21.10.1 diff -u -d -r1.21 -r1.21.10.1 --- libmd5.tex 2 Nov 2001 21:44:09 -0000 1.21 +++ libmd5.tex 16 Oct 2005 05:23:57 -0000 1.21.10.1 @@ -4,6 +4,7 @@ \declaremodule{builtin}{md5} \modulesynopsis{RSA's MD5 message digest algorithm.} +\deprecated{2.5}{Use the \refmodule{hashlib} module instead.} This module implements the interface to RSA's MD5 message digest \index{message digest, MD5} Index: libmmap.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libmmap.tex,v retrieving revision 1.8.8.1 retrieving revision 1.8.8.2 diff -u -d -r1.8.8.1 -r1.8.8.2 --- libmmap.tex 7 Jan 2005 06:57:24 -0000 1.8.8.1 +++ libmmap.tex 16 Oct 2005 05:23:57 -0000 1.8.8.2 @@ -35,7 +35,7 @@ taken from the specified file. Assignment to an \constant{ACCESS_READ} memory map raises a \exception{TypeError} exception. Assignment to an \constant{ACCESS_WRITE} memory map -affects both memory and the underlying file. Assigment to an +affects both memory and the underlying file. Assignment to an \constant{ACCESS_COPY} memory map affects memory but does not update the underlying file. @@ -62,8 +62,10 @@ prot\optional{, access}}}} \strong{(\UNIX{} version)} Maps \var{length} bytes from the file specified by the file descriptor \var{fileno}, and returns a mmap - object. - + object. If \var{length} is \code{0}, the maximum length of the map + will be the current size of the file when \function{mmap(} is + called. + \var{flags} specifies the nature of the mapping. \constant{MAP_PRIVATE} creates a private copy-on-write mapping, so changes to the contents of the mmap object will be private to this @@ -130,6 +132,7 @@ \end{methoddesc} \begin{methoddesc}{resize}{\var{newsize}} + Resizes the map and the underlying file, if any. If the mmap was created with \constant{ACCESS_READ} or \constant{ACCESS_COPY}, resizing the map will throw a \exception{TypeError} exception. \end{methoddesc} Index: libnew.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libnew.tex,v retrieving revision 1.9.2.1 retrieving revision 1.9.2.2 diff -u -d -r1.9.2.1 -r1.9.2.2 --- libnew.tex 7 Jan 2005 06:57:24 -0000 1.9.2.1 +++ libnew.tex 16 Oct 2005 05:23:57 -0000 1.9.2.2 @@ -47,9 +47,10 @@ %XXX This is still undocumented!!!!!!!!!!! \end{funcdesc} -\begin{funcdesc}{module}{name} +\begin{funcdesc}{module}{name[, doc]} This function returns a new module object with name \var{name}. \var{name} must be a string. +The optional \var{doc} argument can have any type. \end{funcdesc} \begin{funcdesc}{classobj}{name, baseclasses, dict} Index: libnntplib.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libnntplib.tex,v retrieving revision 1.27.12.2 retrieving revision 1.27.12.3 diff -u -d -r1.27.12.2 -r1.27.12.3 --- libnntplib.tex 7 Jan 2005 06:57:24 -0000 1.27.12.2 +++ libnntplib.tex 16 Oct 2005 05:23:57 -0000 1.27.12.3 @@ -156,7 +156,7 @@ Send a \samp{NEWNEWS} command. Here, \var{group} is a group name or \code{'*'}, and \var{date} and \var{time} have the same meaning as for \method{newgroups()}. Return a pair \code{(\var{response}, -\var{articles})} where \var{articles} is a list of article ids. +\var{articles})} where \var{articles} is a list of message ids. If the \var{file} parameter is supplied, then the output of the \samp{NEWNEWS} command is stored in a file. If \var{file} is a string, then the method will open a file object with that name, write to it @@ -228,7 +228,7 @@ in \character{<} and \character{>}) or an article number (as a string). Return a triple \code{(\var{response}, \var{number}, \var{id})} where \var{number} is the article number (as a string) and \var{id} is the -article id (enclosed in \character{<} and \character{>}). +message id (enclosed in \character{<} and \character{>}). \end{methoddesc} \begin{methoddesc}{next}{} @@ -275,7 +275,7 @@ the form \code{'\var{first}-\var{last}'} where \var{first} and \var{last} are the first and last article numbers to search. Return a pair \code{(\var{response}, \var{list})}, where \var{list} is a list of -pairs \code{(\var{id}, \var{text})}, where \var{id} is an article id +pairs \code{(\var{id}, \var{text})}, where \var{id} is an article number (as a string) and \var{text} is the text of the requested header for that article. If the \var{file} parameter is supplied, then the output of the @@ -295,7 +295,9 @@ \end{methoddesc} \begin{methoddesc}{ihave}{id, file} -Send an \samp{IHAVE} command. If the response is not an error, treat +Send an \samp{IHAVE} command. \var{id} is a message id (enclosed in +\character{<} and \character{>}). +If the response is not an error, treat \var{file} exactly as for the \method{post()} method. \end{methoddesc} Index: liboperator.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/liboperator.tex,v retrieving revision 1.21.10.2 retrieving revision 1.21.10.3 diff -u -d -r1.21.10.2 -r1.21.10.3 --- liboperator.tex 7 Jan 2005 06:57:24 -0000 1.21.10.2 +++ liboperator.tex 16 Oct 2005 05:23:57 -0000 1.21.10.3 @@ -262,7 +262,8 @@ \begin{funcdesc}{isMappingType}{o} Returns true if the object \var{o} supports the mapping interface. -This is true for dictionaries and all instance objects. +This is true for dictionaries and all instance objects defining +\method{__getitem__}. \warning{There is no reliable way to test if an instance supports the complete mapping protocol since the interface itself is ill-defined. This makes this test less useful than it otherwise might @@ -271,7 +272,7 @@ \begin{funcdesc}{isNumberType}{o} Returns true if the object \var{o} represents a number. This is true -for all numeric types implemented in C, and for all instance objects. +for all numeric types implemented in C. \warning{There is no reliable way to test if an instance supports the complete numeric interface since the interface itself is ill-defined. This makes this test less useful than it otherwise might @@ -281,7 +282,8 @@ \begin{funcdesc}{isSequenceType}{o} Returns true if the object \var{o} supports the sequence protocol. This returns true for all objects which define sequence methods in C, -and for all instance objects. \warning{There is no reliable +and for all instance objects defining \method{__getitem__}. +\warning{There is no reliable way to test if an instance supports the complete sequence interface since the interface itself is ill-defined. This makes this test less useful than it otherwise might be.} @@ -289,7 +291,7 @@ Example: Build a dictionary that maps the ordinals from \code{0} to -\code{256} to their character equivalents. +\code{255} to their character equivalents. \begin{verbatim} >>> import operator @@ -306,24 +308,31 @@ \method{itertools.groupby()}, or other functions that expect a function argument. -\begin{funcdesc}{attrgetter}{attr} +\begin{funcdesc}{attrgetter}{attr\optional{, args...}} Return a callable object that fetches \var{attr} from its operand. +If more than one attribute is requested, returns a tuple of attributes. After, \samp{f=attrgetter('name')}, the call \samp{f(b)} returns -\samp{b.name}. +\samp{b.name}. After, \samp{f=attrgetter('name', 'date')}, the call +\samp{f(b)} returns \samp{(b.name, b.date)}. \versionadded{2.4} +\versionchanged[Added support for multiple attributes]{2.5} \end{funcdesc} -\begin{funcdesc}{itemgetter}{item} +\begin{funcdesc}{itemgetter}{item\optional{, args...}} Return a callable object that fetches \var{item} from its operand. +If more than one item is requested, returns a tuple of items. After, \samp{f=itemgetter(2)}, the call \samp{f(b)} returns \samp{b[2]}. +After, \samp{f=itemgetter(2,5,3)}, the call \samp{f(b)} returns +\samp{(b[2], b[5], b[3])}. \versionadded{2.4} +\versionchanged[Added support for multiple item extraction]{2.5} \end{funcdesc} Examples: \begin{verbatim} ->>> from operator import * +>>> from operator import itemgetter >>> inventory = [('apple', 3), ('banana', 2), ('pear', 5), ('orange', 1)] >>> getcount = itemgetter(1) >>> map(getcount, inventory) Index: libos.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libos.tex,v retrieving revision 1.91.2.2 retrieving revision 1.91.2.3 diff -u -d -r1.91.2.2 -r1.91.2.3 --- libos.tex 7 Jan 2005 06:57:25 -0000 1.91.2.2 +++ libos.tex 16 Oct 2005 05:23:57 -0000 1.91.2.3 @@ -106,9 +106,15 @@ \code{environ} may cause memory leaks. Refer to the system documentation for \cfunction{putenv()}.} -If \function{putenv()} is not provided, this mapping may be passed to -the appropriate process-creation functions to cause child processes to -use a modified environment. +If \function{putenv()} is not provided, a modified copy of this mapping +may be passed to the appropriate process-creation functions to cause +child processes to use a modified environment. + +If the platform supports the \function{unsetenv()} function, you can +delete items in this mapping to unset environment variables. +\function{unsetenv()} will be called automatically when an item is +deleted from \code{os.environ}. + \end{datadesc} \begin{funcdescni}{chdir}{path} @@ -307,7 +313,19 @@ Availability: recent flavors of \UNIX. \end{funcdesc} +\begin{funcdesc}{unsetenv}{varname} +\index{environment variables!deleting} +Unset (delete) the environment variable named \var{varname}. Such +changes to the environment affect subprocesses started with +\function{os.system()}, \function{popen()} or \function{fork()} and +\function{execv()}. Availability: most flavors of \UNIX, Windows. +When \function{unsetenv()} is +supported, deletion of items in \code{os.environ} is automatically +translated into a corresponding call to \function{unsetenv()}; however, +calls to \function{unsetenv()} don't update \code{os.environ}, so it is +actually preferable to delete items of \code{os.environ}. +\end{funcdesc} \subsection{File Object Creation \label{os-newstreams}} @@ -337,7 +355,7 @@ available as the return value of the \method{close()} method of the file object, except that when the exit status is zero (termination without errors), \code{None} is returned. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \versionchanged[This function worked unreliably under Windows in earlier versions of Python. This was due to the use of the @@ -350,11 +368,11 @@ Return a new file object opened in update mode (\samp{w+b}). The file has no directory entries associated with it and will be automatically deleted once there are no file descriptors for the file. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \end{funcdesc} -For each of these \function{popen()} variants, if \var{bufsize} is +For each of the following \function{popen()} variants, if \var{bufsize} is specified, it specifies the buffer size for the I/O pipes. \var{mode}, if provided, should be the string \code{'b'} or \code{'t'}; on Windows this is needed to determine whether the file @@ -366,7 +384,7 @@ intervention (as with \function{os.spawnv()}). If \var{cmd} is a string it will be passed to the shell (as with \function{os.system()}). -These methods do not make it possible to retrieve the return code from +These methods do not make it possible to retrieve the exit status from the child processes. The only way to control the input and output streams and also retrieve the return codes is to use the \class{Popen3} and \class{Popen4} classes from the \refmodule{popen2} @@ -380,21 +398,21 @@ \begin{funcdesc}{popen2}{cmd\optional{, mode\optional{, bufsize}}} Executes \var{cmd} as a sub-process. Returns the file objects \code{(\var{child_stdin}, \var{child_stdout})}. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \versionadded{2.0} \end{funcdesc} \begin{funcdesc}{popen3}{cmd\optional{, mode\optional{, bufsize}}} Executes \var{cmd} as a sub-process. Returns the file objects \code{(\var{child_stdin}, \var{child_stdout}, \var{child_stderr})}. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \versionadded{2.0} \end{funcdesc} \begin{funcdesc}{popen4}{cmd\optional{, mode\optional{, bufsize}}} Executes \var{cmd} as a sub-process. Returns the file objects \code{(\var{child_stdin}, \var{child_stdout_and_stderr})}. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \versionadded{2.0} \end{funcdesc} @@ -409,8 +427,15 @@ \subsection{File Descriptor Operations \label{os-fd-ops}} -These functions operate on I/O streams referred to -using file descriptors. +These functions operate on I/O streams referenced using file +descriptors. + +File descriptors are small integers corresponding to a file that has +been opened by the current process. For example, standard input is +usually file descriptor 0, standard output is 1, and standard error is +2. Further files opened by a process will then be assigned 3, 4, 5, +and so forth. The name ``file descriptor'' is slightly deceptive; on +{\UNIX} platforms, sockets and pipes are also referenced by file descriptors. \begin{funcdesc}{close}{fd} @@ -434,7 +459,7 @@ \begin{funcdesc}{dup2}{fd, fd2} Duplicate file descriptor \var{fd} to \var{fd2}, closing the latter first if necessary. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \end{funcdesc} \begin{funcdesc}{fdatasync}{fd} @@ -453,7 +478,7 @@ \code{pathconf_names} dictionary. For configuration variables not included in that mapping, passing an integer for \var{name} is also accepted. -Availability: \UNIX. +Availability: Macintosh, \UNIX. If \var{name} is a string and is not known, \exception{ValueError} is raised. If a specific value for \var{name} is not supported by the @@ -464,7 +489,7 @@ \begin{funcdesc}{fstat}{fd} Return status for file descriptor \var{fd}, like \function{stat()}. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \end{funcdesc} \begin{funcdesc}{fstatvfs}{fd} @@ -482,19 +507,19 @@ \code{\var{f}.flush()}, and then do \code{os.fsync(\var{f}.fileno())}, to ensure that all internal buffers associated with \var{f} are written to disk. -Availability: \UNIX, and Windows starting in 2.2.3. +Availability: Macintosh, \UNIX, and Windows starting in 2.2.3. \end{funcdesc} \begin{funcdesc}{ftruncate}{fd, length} Truncate the file corresponding to file descriptor \var{fd}, so that it is at most \var{length} bytes in size. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{isatty}{fd} Return \code{True} if the file descriptor \var{fd} is open and connected to a tty(-like) device, else \code{False}. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{lseek}{fd, pos, how} @@ -531,13 +556,13 @@ \code{(\var{master}, \var{slave})} for the pty and the tty, respectively. For a (slightly) more portable approach, use the \refmodule{pty}\refstmodindex{pty} module. -Availability: Some flavors of \UNIX. +Availability: Macintosh, Some flavors of \UNIX. \end{funcdesc} \begin{funcdesc}{pipe}{} Create a pipe. Return a pair of file descriptors \code{(\var{r}, \var{w})} usable for reading and writing, respectively. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \end{funcdesc} \begin{funcdesc}{read}{fd, n} @@ -560,21 +585,21 @@ \begin{funcdesc}{tcgetpgrp}{fd} Return the process group associated with the terminal given by \var{fd} (an open file descriptor as returned by \function{open()}). -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{tcsetpgrp}{fd, pg} Set the process group associated with the terminal given by \var{fd} (an open file descriptor as returned by \function{open()}) to \var{pg}. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{ttyname}{fd} Return a string which specifies the terminal device associated with file-descriptor \var{fd}. If \var{fd} is not associated with a terminal device, an exception is raised. -Availability: \UNIX. +Availability:Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{write}{fd, str} @@ -594,7 +619,9 @@ The following data items are available for use in constructing the -\var{flags} parameter to the \function{open()} function. +\var{flags} parameter to the \function{open()} function. Some items will +not be available on all platforms. For descriptions of their availability +and use, consult \manpage{open}{2}. \begin{datadesc}{O_RDONLY} \dataline{O_WRONLY} @@ -614,6 +641,8 @@ \dataline{O_NDELAY} \dataline{O_NONBLOCK} \dataline{O_NOCTTY} +\dataline{O_SHLOCK} +\dataline{O_EXLOCK} More options for the \var{flag} argument to the \function{open()} function. Availability: Macintosh, \UNIX. \end{datadesc} @@ -621,7 +650,7 @@ \begin{datadesc}{O_BINARY} Option for the \var{flag} argument to the \function{open()} function. This can be bit-wise OR'd together with those listed above. -Availability: Macintosh, Windows. +Availability: Windows. % XXX need to check on the availability of this one. \end{datadesc} @@ -636,6 +665,15 @@ Availability: Windows. \end{datadesc} +\begin{datadesc}{SEEK_SET} +\dataline{SEEK_CUR} +\dataline{SEEK_END} +Parameters to the \function{lseek()} function. +Their values are 0, 1, and 2, respectively. +Availability: Windows, Macintosh, \UNIX. +\versionadded{2.5} +\end{datadesc} + \subsection{Files and Directories \label{os-file-dir}} \begin{funcdesc}{access}{path, mode} @@ -648,7 +686,17 @@ test permissions. Return \constant{True} if access is allowed, \constant{False} if not. See the \UNIX{} man page \manpage{access}{2} for more information. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. + +\note{Using \function{access()} to check if a user is authorized to e.g. +open a file before actually doing so using \function{open()} creates a +security hole, because the user might exploit the short time interval +between checking and opening the file to manipulate it.} + +\note{I/O operations may fail even when \function{access()} +indicates that they would succeed, particularly for operations +on network filesystems which may have permissions semantics +beyond the usual \POSIX{} permission-bit model.} \end{funcdesc} \begin{datadesc}{F_OK} @@ -692,13 +740,13 @@ \begin{funcdesc}{getcwdu}{} Return a Unicode object representing the current working directory. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \versionadded{2.3} \end{funcdesc} \begin{funcdesc}{chroot}{path} Change the root directory of the current process to \var{path}. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.2} \end{funcdesc} @@ -727,25 +775,30 @@ \item \code{S_IWOTH} \item \code{S_IXOTH} \end{itemize} -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. + +\note{Although Windows supports \function{chmod()}, you can only +set the file's read-only flag with it (via the \code{S_IWRITE} +and \code{S_IREAD} constants or a corresponding integer value). +All other bits are ignored.} \end{funcdesc} \begin{funcdesc}{chown}{path, uid, gid} Change the owner and group id of \var{path} to the numeric \var{uid} -and \var{gid}. -Availability: \UNIX. +and \var{gid}. To leave one of the ids unchanged, set it to -1. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{lchown}{path, uid, gid} Change the owner and group id of \var{path} to the numeric \var{uid} and gid. This function will not follow symbolic links. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{funcdesc} \begin{funcdesc}{link}{src, dst} Create a hard link pointing to \var{src} named \var{dst}. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{listdir}{path} @@ -755,20 +808,20 @@ directory. Availability: Macintosh, \UNIX, Windows. -\versionchanged[On Windows NT/2k/XP and Unix, if \var{path} is a Unicode +\versionchanged[On Windows NT/2k/XP and \UNIX, if \var{path} is a Unicode object, the result will be a list of Unicode objects.]{2.3} \end{funcdesc} \begin{funcdesc}{lstat}{path} Like \function{stat()}, but do not follow symbolic links. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{mkfifo}{path\optional{, mode}} Create a FIFO (a named pipe) named \var{path} with numeric mode \var{mode}. The default \var{mode} is \code{0666} (octal). The current umask value is first masked out from the mode. -Availability: \UNIX. +Availability: Macintosh, \UNIX. FIFOs are pipes that can be accessed like regular files. FIFOs exist until they are deleted (for example with \function{os.unlink()}). @@ -790,12 +843,14 @@ \end{funcdesc} \begin{funcdesc}{major}{device} -Extracts a device major number from a raw device number. +Extracts the device major number from a raw device number (usually +the \member{st_dev} or \member{st_rdev} field from \ctype{stat}). \versionadded{2.3} \end{funcdesc} \begin{funcdesc}{minor}{device} -Extracts a device minor number from a raw device number. +Extracts the device minor number from a raw device number (usually +the \member{st_dev} or \member{st_rdev} field from \ctype{stat}). \versionadded{2.3} \end{funcdesc} @@ -835,7 +890,7 @@ \code{pathconf_names} dictionary. For configuration variables not included in that mapping, passing an integer for \var{name} is also accepted. -Availability: \UNIX. +Availability: Macintosh, \UNIX. If \var{name} is a string and is not known, \exception{ValueError} is raised. If a specific value for \var{name} is not supported by the @@ -849,7 +904,7 @@ \function{fpathconf()} to the integer values defined for those names by the host operating system. This can be used to determine the set of names known to the system. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{datadesc} \begin{funcdesc}{readlink}{path} @@ -857,7 +912,7 @@ points. The result may be either an absolute or relative pathname; if it is relative, it may be converted to an absolute pathname using \code{os.path.join(os.path.dirname(\var{path}), \var{result})}. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{remove}{path} @@ -931,18 +986,35 @@ \member{st_mtime} (time of most recent content modification), \member{st_ctime} (platform dependent; time of most recent metadata change on \UNIX, or -the time of creation on Windows). +the time of creation on Windows): + +\begin{verbatim} +>>> import os +>>> statinfo = os.stat('somefile.txt') +>>> statinfo +(33188, 422511L, 769L, 1, 1032, 100, 926L, 1105022698,1105022732, 1105022732) +>>> statinfo.st_size +926L +>>> +\end{verbatim} \versionchanged [If \function{stat_float_times} returns true, the time values are floats, measuring seconds. Fractions of a second may be reported if the system supports that. On Mac OS, the times are always floats. See \function{stat_float_times} for further discussion. ]{2.3} -On some Unix systems (such as Linux), the following attributes may +On some \UNIX{} systems (such as Linux), the following attributes may also be available: \member{st_blocks} (number of blocks allocated for file), \member{st_blksize} (filesystem blocksize), \member{st_rdev} (type of device if an inode device). +\member{st_flags} (user defined flags for file). + +On other \UNIX{} systems (such as FreeBSD), the following attributes +may be available (but may be only filled out of root tries to +use them: +\member{st_gen} (file generation number), +\member{st_birthtime} (time of file creation). On Mac OS systems, the following attributes may also be available: \member{st_rsize}, @@ -985,6 +1057,7 @@ \versionchanged [Added access to values as attributes of the returned object]{2.2} +\versionchanged[Added st_gen, st_birthtime]{2.5} \end{funcdesc} \begin{funcdesc}{stat_float_times}{\optional{newvalue}} @@ -994,16 +1067,15 @@ the current setting. For compatibility with older Python versions, accessing -\class{stat_result} as a tuple always returns integers. For -compatibility with Python 2.2, accessing the time stamps by field name -also returns integers. Applications that want to determine the -fractions of a second in a time stamp can use this function to have -time stamps represented as floats. Whether they will actually observe -non-zero fractions depends on the system. +\class{stat_result} as a tuple always returns integers. -Future Python releases will change the default of this setting; -applications that cannot deal with floating point time stamps can then -use this function to turn the feature off. +\versionchanged[Python now returns float values by default. Applications +which do not work correctly with floating point time stamps can use +this function to restore the old behaviour]{2.5} + +The resolution of the timestamps (i.e. the smallest possible fraction) +depends on the system. Some systems only support second resolution; +on these systems, the fraction will always be zero. It is recommended that this setting is only changed at program startup time in the \var{__main__} module; libraries should never change this @@ -1060,8 +1132,8 @@ behavior of this function depends on the C library implementation; some aspects are underspecified in system documentation. \warning{Use of \function{tempnam()} is vulnerable to symlink attacks; -consider using \function{tmpfile()} instead.} -Availability: \UNIX, Windows. +consider using \function{tmpfile()} (section \ref{os-newstreams}) +instead.} Availability: Macintosh, \UNIX, Windows. \end{funcdesc} \begin{funcdesc}{tmpnam}{} @@ -1072,12 +1144,13 @@ paths returned by \function{tmpnam()}; no automatic cleanup is provided. \warning{Use of \function{tmpnam()} is vulnerable to symlink attacks; -consider using \function{tmpfile()} instead.} -Availability: \UNIX, Windows. This function probably shouldn't be used -on Windows, though: Microsoft's implementation of \function{tmpnam()} -always creates a name in the root directory of the current drive, and -that's generally a poor location for a temp file (depending on -privileges, you may not even be able to open a file using this name). +consider using \function{tmpfile()} (section \ref{os-newstreams}) +instead.} Availability: \UNIX, Windows. This function probably +shouldn't be used on Windows, though: Microsoft's implementation of +\function{tmpnam()} always creates a name in the root directory of the +current drive, and that's generally a poor location for a temp file +(depending on privileges, you may not even be able to open a file +using this name). \end{funcdesc} \begin{datadesc}{TMP_MAX} @@ -1220,7 +1293,7 @@ process immediately returns an exit code of \code{3}. Be aware that programs which use \function{signal.signal()} to register a handler for \constant{SIGABRT} will behave differently. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \end{funcdesc} \begin{funcdesc}{execl}{path, arg0, arg1, \moreargs} @@ -1266,13 +1339,13 @@ \function{execlp()}, \function{execv()}, and \function{execvp()} all cause the new process to inherit the environment of the current process. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \end{funcdesc} \begin{funcdesc}{_exit}{n} Exit to the system with status \var{n}, without calling cleanup handlers, flushing stdio buffers, etc. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \begin{notice} The standard way to exit is \code{sys.exit(\var{n})}. @@ -1285,79 +1358,82 @@ \function{_exit()}, although they are not required. These are typically used for system programs written in Python, such as a mail server's external command delivery program. +\note{Some of these may not be available on all \UNIX{} platforms, +since there is some variation. These constants are defined where they +are defined by the underlying platform.} \begin{datadesc}{EX_OK} Exit code that means no error occurred. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{datadesc}{EX_USAGE} Exit code that means the command was used incorrectly, such as when the wrong number of arguments are given. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{datadesc}{EX_DATAERR} Exit code that means the input data was incorrect. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{datadesc}{EX_NOINPUT} Exit code that means an input file did not exist or was not readable. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{datadesc}{EX_NOUSER} Exit code that means a specified user did not exist. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{datadesc}{EX_NOHOST} Exit code that means a specified host did not exist. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{datadesc}{EX_UNAVAILABLE} Exit code that means that a required service is unavailable. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{datadesc}{EX_SOFTWARE} Exit code that means an internal software error was detected. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{datadesc}{EX_OSERR} Exit code that means an operating system error was detected, such as the inability to fork or create a pipe. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{datadesc}{EX_OSFILE} Exit code that means some system file did not exist, could not be opened, or had some other kind of error. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{datadesc}{EX_CANTCREAT} Exit code that means a user specified output file could not be created. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{datadesc}{EX_IOERR} Exit code that means that an error occurred while doing I/O on some file. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} @@ -1365,40 +1441,40 @@ Exit code that means a temporary failure occurred. This indicates something that may not really be an error, such as a network connection that couldn't be made during a retryable operation. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{datadesc}{EX_PROTOCOL} Exit code that means that a protocol exchange was illegal, invalid, or not understood. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{datadesc}{EX_NOPERM} Exit code that means that there were insufficient permissions to perform the operation (but not intended for file system problems). -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{datadesc}{EX_CONFIG} Exit code that means that some kind of configuration error occurred. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{datadesc}{EX_NOTFOUND} Exit code that means something like ``an entry was not found''. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} \begin{funcdesc}{fork}{} Fork a child process. Return \code{0} in the child, the child's process id in the parent. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{forkpty}{} @@ -1408,7 +1484,7 @@ in the parent, and \var{fd} is the file descriptor of the master end of the pseudo-terminal. For a more portable approach, use the \refmodule{pty} module. -Availability: Some flavors of \UNIX. +Availability: Macintosh, Some flavors of \UNIX. \end{funcdesc} \begin{funcdesc}{kill}{pid, sig} @@ -1417,27 +1493,27 @@ Kill the process \var{pid} with signal \var{sig}. Constants for the specific signals available on the host platform are defined in the \refmodule{signal} module. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{killpg}{pgid, sig} \index{process!killing} \index{process!signalling} Kill the process group \var{pgid} with the signal \var{sig}. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{funcdesc} \begin{funcdesc}{nice}{increment} Add \var{increment} to the process's ``niceness''. Return the new niceness. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{plock}{op} Lock program segments into memory. The value of \var{op} (defined in \code{}) determines which segments are locked. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdescni}{popen}{\unspecified} @@ -1518,7 +1594,7 @@ family of functions. If either of these values is given, the \function{spawn*()} functions will return as soon as the new process has been created, with the process ID as the return value. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \versionadded{1.6} \end{datadesc} @@ -1529,7 +1605,7 @@ has run to completion and will return the exit code of the process the run is successful, or \code{-\var{signal}} if a signal kills the process. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \versionadded{1.6} \end{datadesc} @@ -1583,7 +1659,7 @@ and XP) this is the exit status of the command run; on systems using a non-native shell, consult your shell documentation. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \end{funcdesc} \begin{funcdesc}{times}{} @@ -1594,7 +1670,7 @@ point in the past, in that order. See the \UNIX{} manual page \manpage{times}{2} or the corresponding Windows Platform API documentation. -Availability: \UNIX, Windows. +Availability: Macintosh, \UNIX, Windows. \end{funcdesc} \begin{funcdesc}{wait}{} @@ -1603,7 +1679,7 @@ the signal number that killed the process, and whose high byte is the exit status (if the signal number is zero); the high bit of the low byte is set if a core file was produced. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{waitpid}{pid, options} @@ -1639,9 +1715,10 @@ \end{funcdesc} \begin{datadesc}{WNOHANG} -The option for \function{waitpid()} to avoid hanging if no child -process status is available immediately. -Availability: \UNIX. +The option for \function{waitpid()} to return immediately if no child +process status is available immediately. The function returns +\code{(0, 0)} in this case. +Availability: Macintosh, \UNIX. \end{datadesc} \begin{datadesc}{WCONTINUED} @@ -1656,7 +1733,7 @@ This option causes child processes to be reported if they have been stopped but their current state has not been reported since they were stopped. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{datadesc} @@ -1668,7 +1745,7 @@ \begin{funcdesc}{WCOREDUMP}{status} Returns \code{True} if a core dump was generated for the process, otherwise it returns \code{False}. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \versionadded{2.3} \end{funcdesc} @@ -1688,30 +1765,30 @@ \begin{funcdesc}{WIFSIGNALED}{status} Returns \code{True} if the process exited due to a signal, otherwise it returns \code{False}. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{WIFEXITED}{status} Returns \code{True} if the process exited using the \manpage{exit}{2} system call, otherwise it returns \code{False}. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{WEXITSTATUS}{status} If \code{WIFEXITED(\var{status})} is true, return the integer parameter to the \manpage{exit}{2} system call. Otherwise, the return value is meaningless. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{WSTOPSIG}{status} Return the signal which caused the process to stop. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{funcdesc}{WTERMSIG}{status} Return the signal which caused the process to exit. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} @@ -1728,7 +1805,7 @@ \code{confstr_names} dictionary. For configuration variables not included in that mapping, passing an integer for \var{name} is also accepted. -Availability: \UNIX. +Availability: Macintosh, \UNIX. If the configuration value specified by \var{name} isn't defined, the empty string is returned. @@ -1744,7 +1821,7 @@ Dictionary mapping names accepted by \function{confstr()} to the integer values defined for those names by the host operating system. This can be used to determine the set of names known to the system. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{datadesc} \begin{funcdesc}{getloadavg}{} @@ -1762,14 +1839,14 @@ parameter for \function{confstr()} apply here as well; the dictionary that provides information on the known names is given by \code{sysconf_names}. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{funcdesc} \begin{datadesc}{sysconf_names} Dictionary mapping names accepted by \function{sysconf()} to the integer values defined for those names by the host operating system. This can be used to determine the set of names known to the system. -Availability: \UNIX. +Availability: Macintosh, \UNIX. \end{datadesc} @@ -1783,21 +1860,21 @@ \begin{datadesc}{curdir} The constant string used by the operating system to refer to the current directory. -For example: \code{'.'} for \POSIX{} or \code{':'} for the Macintosh. +For example: \code{'.'} for \POSIX{} or \code{':'} for Mac OS 9. Also available via \module{os.path}. \end{datadesc} \begin{datadesc}{pardir} The constant string used by the operating system to refer to the parent directory. -For example: \code{'..'} for \POSIX{} or \code{'::'} for the Macintosh. +For example: \code{'..'} for \POSIX{} or \code{'::'} for Mac OS 9. Also available via \module{os.path}. \end{datadesc} \begin{datadesc}{sep} The character used by the operating system to separate pathname components, -for example, \character{/} for \POSIX{} or \character{:} for the -Macintosh. Note that knowing this is not sufficient to be able to +for example, \character{/} for \POSIX{} or \character{:} for +Mac OS 9. Note that knowing this is not sufficient to be able to parse or concatenate pathnames --- use \function{os.path.split()} and \function{os.path.join()} --- but it is occasionally useful. Also available via \module{os.path}. @@ -1841,8 +1918,8 @@ \begin{datadesc}{devnull} The file path of the null device. -For example: \code{'/dev/null'} for \POSIX{} or \code{'Dev:Nul'} for the -Macintosh. +For example: \code{'/dev/null'} for \POSIX{} or \code{'Dev:Nul'} for +Mac OS 9. Also available via \module{os.path}. \versionadded{2.4} \end{datadesc} Index: libossaudiodev.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libossaudiodev.tex,v retrieving revision 1.6.4.2 retrieving revision 1.6.4.3 diff -u -d -r1.6.4.2 -r1.6.4.3 --- libossaudiodev.tex 7 Jan 2005 06:57:26 -0000 1.6.4.2 +++ libossaudiodev.tex 16 Oct 2005 05:23:57 -0000 1.6.4.3 @@ -115,7 +115,7 @@ as flexible in all cases. The audio device objects returned by \function{open()} define the -following methods: +following methods and (read-only) attributes: \begin{methoddesc}[audio device]{close}{} Explicitly close the audio device. When you are done writing to or @@ -166,7 +166,7 @@ \begin{methoddesc}[audio device]{getfmts}{} Return a bitmask of the audio output formats supported by the -soundcard. On a typical Linux system, these formats are: +soundcard. Some of the formats supported by OSS are: \begin{tableii}{l|l}{constant}{Format}{Description} \lineii{AFMT_MU_LAW} @@ -180,21 +180,22 @@ \lineii{AFMT_U8} {Unsigned, 8-bit audio} \lineii{AFMT_S16_LE} - {Unsigned, 16-bit audio, little-endian byte order (as used by + {Signed, 16-bit audio, little-endian byte order (as used by Intel processors)} \lineii{AFMT_S16_BE} - {Unsigned, 16-bit audio, big-endian byte order (as used by 68k, + {Signed, 16-bit audio, big-endian byte order (as used by 68k, PowerPC, Sparc)} \lineii{AFMT_S8} {Signed, 8 bit audio} \lineii{AFMT_U16_LE} - {Signed, 16-bit little-endian audio} + {Unsigned, 16-bit little-endian audio} \lineii{AFMT_U16_BE} - {Signed, 16-bit big-endian audio} + {Unsigned, 16-bit big-endian audio} \end{tableii} -Most systems support only a subset of these formats. Many devices only -support \constant{AFMT_U8}; the most common format used today is -\constant{AFMT_S16_LE}. +Consult the OSS documentation for a full list of audio formats, and note +that most devices support only a subset of these formats. Some older +devices only support \constant{AFMT_U8}; the most common format used +today is \constant{AFMT_S16_LE}. \end{methoddesc} \begin{methoddesc}[audio device]{setfmt}{format} @@ -289,6 +290,21 @@ buffer to be played without blocking. \end{methoddesc} +Audio device objects also support several read-only attributes: + +\begin{memberdesc}[audio device]{closed}{} +Boolean indicating whether the device has been closed. +\end{memberdesc} + +\begin{memberdesc}[audio device]{name}{} +String containing the name of the device file. +\end{memberdesc} + +\begin{memberdesc}[audio device]{mode}{} +The I/O mode for the file, either \code{"r"}, \code{"rw"}, or \code{"w"}. +\end{memberdesc} + + \subsection{Mixer Device Objects \label{mixer-device-objects}} The mixer object provides two file-like methods: Index: libpoplib.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libpoplib.tex,v retrieving revision 1.14.8.2 retrieving revision 1.14.8.3 diff -u -d -r1.14.8.2 -r1.14.8.3 --- libpoplib.tex 7 Jan 2005 06:57:26 -0000 1.14.8.2 +++ libpoplib.tex 16 Oct 2005 05:23:57 -0000 1.14.8.3 @@ -108,8 +108,8 @@ \begin{methoddesc}{list}{\optional{which}} Request message list, result is in the form -\code{(\var{response}, ['mesg_num octets', ...])}. If \var{which} is -set, it is the message to list. +\code{(\var{response}, ['mesg_num octets', ...], \var{octets})}. +If \var{which} is set, it is the message to list. \end{methoddesc} \begin{methoddesc}{retr}{which} Index: libposixpath.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libposixpath.tex,v retrieving revision 1.23.2.2 retrieving revision 1.23.2.3 diff -u -d -r1.23.2.2 -r1.23.2.3 --- libposixpath.tex 7 Jan 2005 06:57:26 -0000 1.23.2.2 +++ libposixpath.tex 16 Oct 2005 05:23:57 -0000 1.23.2.3 @@ -55,12 +55,20 @@ \end{funcdesc} \begin{funcdesc}{expanduser}{path} -Return the argument with an initial component of \samp{\~} or -\samp{\~\var{user}} replaced by that \var{user}'s home directory. An -initial \samp{\~{}} is replaced by the environment variable -\envvar{HOME}; an initial \samp{\~\var{user}} is looked up in the -password directory through the built-in module -\refmodule{pwd}\refbimodindex{pwd}. If the expansion fails, or if the +On \UNIX, return the argument with an initial component of \samp{\~} or +\samp{\~\var{user}} replaced by that \var{user}'s home directory. +An initial \samp{\~} is replaced by the environment variable +\envvar{HOME} if it is set; otherwise the current user's home directory +is looked up in the password directory through the built-in module +\refmodule{pwd}\refbimodindex{pwd}. +An initial \samp{\~\var{user}} is looked up directly in the +password directory. + +On Windows, only \samp{\~} is supported; it is replaced by the +environment variable \envvar{HOME} or by a combination of +\envvar{HOMEDRIVE} and \envvar{HOMEPATH}. + +If the expansion fails or if the path does not begin with a tilde, the path is returned unchanged. \end{funcdesc} @@ -158,7 +166,7 @@ \begin{funcdesc}{normpath}{path} Normalize a pathname. This collapses redundant separators and -up-level references, e.g. \code{A//B}, \code{A/./B} and +up-level references so that \code{A//B}, \code{A/./B} and \code{A/foo/../B} all become \code{A/B}. It does not normalize the case (use \function{normcase()} for that). On Windows, it converts forward slashes to backward slashes. It should be understood that this may @@ -226,6 +234,15 @@ at most one period. \end{funcdesc} +\begin{funcdesc}{splitunc}{path} +Split the pathname \var{path} into a pair \code{(\var{unc}, \var{rest})} +so that \var{unc} is the UNC mount point (such as \code{r'\e\e host\e mount'}), +if present, and \var{rest} the rest of the path (such as +\code{r'\e path\e file.ext'}). For paths containing drive letters, \var{unc} +will always be the empty string. +Availability: Windows. +\end{funcdesc} + \begin{funcdesc}{walk}{path, visit, arg} Calls the function \var{visit} with arguments \code{(\var{arg}, \var{dirname}, \var{names})} for each directory in the @@ -234,7 +251,7 @@ directory, the argument \var{names} lists the files in the directory (gotten from \code{os.listdir(\var{dirname})}). The \var{visit} function may modify \var{names} to -influence the set of directories visited below \var{dirname}, e.g., to +influence the set of directories visited below \var{dirname}, e.g. to avoid visiting certain parts of the tree. (The object referred to by \var{names} must be modified in place, using \keyword{del} or slice assignment.) Index: libpwd.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libpwd.tex,v retrieving revision 1.14 retrieving revision 1.14.2.1 diff -u -d -r1.14 -r1.14.2.1 --- libpwd.tex 1 Mar 2002 10:47:36 -0000 1.14 +++ libpwd.tex 16 Oct 2005 05:23:57 -0000 1.14.2.1 @@ -29,9 +29,12 @@ contains a password encrypted with a DES derived algorithm (see module \refmodule{crypt}\refbimodindex{crypt}). However most modern unices use a so-called \emph{shadow password} system. On those unices the -field \code{pw_passwd} only contains a asterisk (\code{'*'}) or the +\var{pw_passwd} field only contains an asterisk (\code{'*'}) or the letter \character{x} where the encrypted password is stored in a file -\file{/etc/shadow} which is not world readable.} +\file{/etc/shadow} which is not world readable. Whether the \var{pw_passwd} +field contains anything useful is system-dependent. If available, the +\module{spwd} module should be used where access to the encrypted password +is required.} It defines the following items: @@ -50,4 +53,5 @@ \begin{seealso} \seemodule{grp}{An interface to the group database, similar to this.} + \seemodule{spwd}{An interface to the shadow password database, similar to this.} \end{seealso} Index: librandom.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/librandom.tex,v retrieving revision 1.30.2.2 retrieving revision 1.30.2.3 diff -u -d -r1.30.2.2 -r1.30.2.3 --- librandom.tex 7 Jan 2005 06:57:27 -0000 1.30.2.2 +++ librandom.tex 16 Oct 2005 05:23:57 -0000 1.30.2.3 @@ -159,8 +159,8 @@ population contains repeats, then each occurrence is a possible selection in the sample. - To choose a sample from a range of integers, use \function{xrange} - as an argument. This is especially fast and space efficient for + To choose a sample from a range of integers, use an \function{xrange()} + object as an argument. This is especially fast and space efficient for sampling from a large population: \code{sample(xrange(10000000), 60)}. \end{funcdesc} @@ -236,7 +236,7 @@ \begin{classdesc}{WichmannHill}{\optional{seed}} Class that implements the Wichmann-Hill algorithm as the core generator. -Has all of the same methods as \class{Random} plus the \method{whseed} +Has all of the same methods as \class{Random} plus the \method{whseed()} method described below. Because this class is implemented in pure Python, it is not threadsafe and may require locks between calls. The period of the generator is 6,953,607,871,644 which is small enough to @@ -246,7 +246,7 @@ \begin{funcdesc}{whseed}{\optional{x}} This is obsolete, supplied for bit-level compatibility with versions of Python prior to 2.1. - See \function{seed} for details. \function{whseed} does not guarantee + See \function{seed()} for details. \function{whseed()} does not guarantee that distinct integer arguments yield distinct internal states, and can yield no more than about 2**24 distinct internal states in all. \end{funcdesc} Index: libre.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libre.tex,v retrieving revision 1.84.2.2 retrieving revision 1.84.2.3 diff -u -d -r1.84.2.2 -r1.84.2.3 --- libre.tex 7 Jan 2005 06:57:27 -0000 1.84.2.2 +++ libre.tex 16 Oct 2005 05:23:57 -0000 1.84.2.3 @@ -342,17 +342,33 @@ at the beginning or end of a word. This is just the opposite of {}\code{\e b}, so is also subject to the settings of \code{LOCALE} and \code{UNICODE}. -\item[\code{\e d}]Matches any decimal digit; this is -equivalent to the set \regexp{[0-9]}. +\item[\code{\e d}]When the \constant{UNICODE} flag is not specified, matches +any decimal digit; this is equivalent to the set \regexp{[0-9]}. +With \constant{UNICODE}, it will match whatever is classified as a digit +in the Unicode character properties database. -\item[\code{\e D}]Matches any non-digit character; this is -equivalent to the set \regexp{[{\textasciicircum}0-9]}. +\item[\code{\e D}]When the \constant{UNICODE} flag is not specified, matches +any non-digit character; this is equivalent to the set +\regexp{[{\textasciicircum}0-9]}. With \constant{UNICODE}, it will match +anything other than character marked as digits in the Unicode character +properties database. -\item[\code{\e s}]Matches any whitespace character; this is +\item[\code{\e s}]When the \constant{LOCALE} and \constant{UNICODE} +flags are not specified, matches any whitespace character; this is equivalent to the set \regexp{[ \e t\e n\e r\e f\e v]}. +With \constant{LOCALE}, it will match this set plus whatever characters +are defined as space for the current locale. If \constant{UNICODE} is set, +this will match the characters \regexp{[ \e t\e n\e r\e f\e v]} plus +whatever is classified as space in the Unicode character properties +database. -\item[\code{\e S}]Matches any non-whitespace character; this is -equivalent to the set \regexp{[\textasciicircum\ \e t\e n\e r\e f\e v]}. +\item[\code{\e S}]When the \constant{LOCALE} and \constant{UNICODE} +flags are not specified, matches any non-whitespace character; this is +equivalent to the set \regexp{[\textasciicircum\ \e t\e n\e r\e f\e v]} +With \constant{LOCALE}, it will match any character not in this set, +and not defined as space in the current locale. If \constant{UNICODE} +is set, this will match anything other than \regexp{[ \e t\e n\e r\e f\e v]} +and characters marked as space in the Unicode character properties database. \item[\code{\e w}]When the \constant{LOCALE} and \constant{UNICODE} flags are not specified, matches any alphanumeric character and the @@ -468,8 +484,8 @@ \begin{datadesc}{L} \dataline{LOCALE} -Make \regexp{\e w}, \regexp{\e W}, \regexp{\e b}, and -\regexp{\e B} dependent on the current locale. +Make \regexp{\e w}, \regexp{\e W}, \regexp{\e b}, \regexp{\e B}, +\regexp{\e s} and \regexp{\e S} dependent on the current locale. \end{datadesc} \begin{datadesc}{M} @@ -493,8 +509,9 @@ \begin{datadesc}{U} \dataline{UNICODE} -Make \regexp{\e w}, \regexp{\e W}, \regexp{\e b}, and -\regexp{\e B} dependent on the Unicode character properties database. +Make \regexp{\e w}, \regexp{\e W}, \regexp{\e b}, \regexp{\e B}, +\regexp{\e d}, \regexp{\e D}, \regexp{\e s} and \regexp{\e S} +dependent on the Unicode character properties database. \versionadded{2.0} \end{datadesc} Index: libsets.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libsets.tex,v retrieving revision 1.11.6.2 retrieving revision 1.11.6.3 diff -u -d -r1.11.6.2 -r1.11.6.3 --- libsets.tex 7 Jan 2005 06:57:27 -0000 1.11.6.2 +++ libsets.tex 16 Oct 2005 05:23:57 -0000 1.11.6.3 @@ -30,7 +30,10 @@ abstract class useful for determining whether something is a set: \code{isinstance(\var{obj}, BaseSet)}. -The set classes are implemented using dictionaries. As a result, sets +The set classes are implemented using dictionaries. Accordingly, the +requirements for set elements are the same as those for dictionary keys; +namely, that the element defines both \method{__eq__} and \method{__hash__}. +As a result, sets cannot contain mutable elements such as lists or dictionaries. However, they can contain immutable collections such as tuples or instances of \class{ImmutableSet}. For convenience in implementing @@ -79,7 +82,7 @@ {test whether every element in \var{t} is in \var{s}} \hline - \lineiii{\var{s}.union(\var{t})}{\var{s} | \var{t}} + \lineiii{\var{s}.union(\var{t})}{\var{s} \textbar{} \var{t}} {new set with elements from both \var{s} and \var{t}} \lineiii{\var{s}.intersection(\var{t})}{\var{s} \&\ \var{t}} {new set with elements common to \var{s} and \var{t}} @@ -130,8 +133,8 @@ but not found in \class{ImmutableSet}: \begin{tableiii}{c|c|l}{code}{Operation}{Equivalent}{Result} - \lineiii{\var{s}.union_update(\var{t})} - {\var{s} |= \var{t}} + \lineiii{\var{s}.update(\var{t})} + {\var{s} \textbar= \var{t}} {return set \var{s} with elements added from \var{t}} \lineiii{\var{s}.intersection_update(\var{t})} {\var{s} \&= \var{t}} @@ -158,12 +161,17 @@ {remove all elements from set \var{s}} \end{tableiii} -Note, the non-operator versions of \method{union_update()}, +Note, the non-operator versions of \method{update()}, \method{intersection_update()}, \method{difference_update()}, and \method{symmetric_difference_update()} will accept any iterable as an argument. \versionchanged[Formerly all arguments were required to be sets]{2.3.1} +Also note, the module also includes a \method{union_update()} method +which is an alias for \method{update()}. The method is included for +backwards compatibility. Programmers should prefer the +\method{update()} method because it is the one supported by the builtin +\class{set()} and \class{frozenset()} types. \subsection{Example \label{set-example}} @@ -228,3 +236,28 @@ where one thread is updating a set while another has temporarily wrapped it in \class{_TemporarilyImmutableSet}. In other words, sets of mutable sets are not thread-safe. + + +\subsection{Comparison to the built-in \class{set} types + \label{comparison-to-builtin-set}} + +The built-in \class{set} and \class{frozenset} types were designed based +on lessons learned from the \module{sets} module. The key differences are: + +\begin{itemize} +\item \class{Set} and \class{ImmutableSet} were renamed to \class{set} and + \class{frozenset}. +\item There is no equivalent to \class{BaseSet}. Instead, use + \code{isinstance(x, (set, frozenset))}. +\item The hash algorithm for the built-ins performs significantly better + (fewer collisions) for most datasets. +\item The built-in versions have more space efficient pickles. +\item The built-in versions do not have a \method{union_update()} method. + Instead, use the \method{update()} method which is equivalent. +\item The built-in versions do not have a \method{_repr(sorted=True)} method. + Instead, use the built-in \function{repr()} and \function{sorted()} + functions: \code{repr(sorted(s))}. +\item The built-in version does not have a protocol for automatic conversion + to immutable. Many found this feature to be confusing and no one + in the community reported having found real uses for it. +\end{itemize} Index: libsha.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libsha.tex,v retrieving revision 1.10.10.1 retrieving revision 1.10.10.2 diff -u -d -r1.10.10.1 -r1.10.10.2 --- libsha.tex 7 Jan 2005 06:57:27 -0000 1.10.10.1 +++ libsha.tex 16 Oct 2005 05:23:58 -0000 1.10.10.2 @@ -5,6 +5,8 @@ \modulesynopsis{NIST's secure hash algorithm, SHA.} \sectionauthor{Fred L. Drake, Jr.}{fdrake at acm.org} +\deprecated{2.5}{Use the \refmodule{hashlib} module instead.} + This module implements the interface to NIST's\index{NIST} secure hash algorithm,\index{Secure Hash Algorithm} known as SHA-1. SHA-1 is an Index: libshelve.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libshelve.tex,v retrieving revision 1.14.26.2 retrieving revision 1.14.26.3 diff -u -d -r1.14.26.2 -r1.14.26.3 --- libshelve.tex 7 Jan 2005 06:57:27 -0000 1.14.26.2 +++ libshelve.tex 16 Oct 2005 05:23:58 -0000 1.14.26.3 @@ -42,6 +42,14 @@ the transition from dictionary based scripts to those requiring persistent storage. +One additional method is supported: +\begin{methoddesc}[Shelf]{sync}{} +Write back all entries in the cache if the shelf was opened with +\var{writeback} set to \var{True}. Also empty the cache and synchronize +the persistent dictionary on disk, if feasible. This is called automatically +when the shelf is closed with \method{close()}. +\end{methoddesc} + \subsection{Restrictions} \begin{itemize} Index: libshlex.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libshlex.tex,v retrieving revision 1.12.20.2 retrieving revision 1.12.20.3 diff -u -d -r1.12.20.2 -r1.12.20.3 --- libshlex.tex 7 Jan 2005 06:57:27 -0000 1.12.20.2 +++ libshlex.tex 16 Oct 2005 05:23:58 -0000 1.12.20.3 @@ -15,6 +15,8 @@ be useful for writing minilanguages, (for example, in run control files for Python applications) or for parsing quoted strings. +\note{The \module{shlex} module currently does not support Unicode input.} + The \module{shlex} module defines the following functions: \begin{funcdesc}{split}{s\optional{, comments}} Index: libshutil.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libshutil.tex,v retrieving revision 1.10.2.2 retrieving revision 1.10.2.3 diff -u -d -r1.10.2.2 -r1.10.2.3 --- libshutil.tex 7 Jan 2005 06:57:27 -0000 1.10.2.2 +++ libshutil.tex 16 Oct 2005 05:23:58 -0000 1.10.2.3 @@ -67,8 +67,10 @@ \begin{funcdesc}{copytree}{src, dst\optional{, symlinks}} Recursively copy an entire directory tree rooted at \var{src}. The destination directory, named by \var{dst}, must not already exist; - it will be created. Individual files are copied using - \function{copy2()}. If \var{symlinks} is true, symbolic links in + it will be created as well as missing parent directories. + Permissions and times of directories are copied with \function{copystat()}, + individual files are copied using \function{copy2()}. + If \var{symlinks} is true, symbolic links in the source tree are represented as symbolic links in the new tree; if false or omitted, the contents of the linked files are copied to the new tree. If exception(s) occur, an Error is raised @@ -76,8 +78,14 @@ The source code for this should be considered an example rather than a tool. -\versionchanged[Error is raised if any exceptions occur during copying, -rather than printing a message]{2.3} + + \versionchanged[Error is raised if any exceptions occur during copying, + rather than printing a message]{2.3} + + \versionchanged[Create intermediate directories needed to create \var{dst}, + rather than raising an error. Copy permissions and times of directories using + \function{copystat()}]{2.5} + \end{funcdesc} \begin{funcdesc}{rmtree}{path\optional{, ignore_errors\optional{, onerror}}} Index: libsimplexmlrpc.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libsimplexmlrpc.tex,v retrieving revision 1.3.8.2 retrieving revision 1.3.8.3 diff -u -d -r1.3.8.2 -r1.3.8.3 --- libsimplexmlrpc.tex 7 Jan 2005 06:57:27 -0000 1.3.8.2 +++ libsimplexmlrpc.tex 16 Oct 2005 05:23:58 -0000 1.3.8.3 @@ -55,24 +55,39 @@ period character. \end{methoddesc} -\begin{methoddesc}[SimpleXMLRPCServer]{register_instance}{instance} +\begin{methoddesc}[SimpleXMLRPCServer]{register_instance}{instance\optional{, + allow_dotted_names}} Register an object which is used to expose method names which have not been registered using \method{register_function()}. If \var{instance} contains a \method{_dispatch()} method, it is called with the requested method name and the parameters from the request. Its - API is \code{def \method{_dispatch}(self, method, params)} (note tha + API is \code{def \method{_dispatch}(self, method, params)} (note that \var{params} does not represent a variable argument list). If it calls an underlying function to perform its task, that function is called as \code{func(*params)}, expanding the parameter list. The return value from \method{_dispatch()} is returned to the client as the result. If \var{instance} does not have a \method{_dispatch()} method, it is - searched for an attribute matching the name of the requested method; + searched for an attribute matching the name of the requested method. + + If the optional \var{allow_dotted_names} argument is true and the + instance does not have a \method{_dispatch()} method, then if the requested method name contains periods, each component of the method name is searched for individually, with the effect that a simple hierarchical search is performed. The value found from this search is then called with the parameters from the request, and the return value is passed back to the client. + + \begin{notice}[warning] + Enabling the \var{allow_dotted_names} option allows intruders to access + your module's global variables and may allow intruders to execute + arbitrary code on your machine. Only use this option on a secure, + closed network. + \end{notice} + + \versionchanged[\var{allow_dotted_names} was added to plug a security hole; + prior versions are insecure]{2.3.5, 2.4.1} + \end{methoddesc} \begin{methoddesc}{register_introspection_functions}{} Index: libsite.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libsite.tex,v retrieving revision 1.23.2.2 retrieving revision 1.23.2.3 diff -u -d -r1.23.2.2 -r1.23.2.3 --- libsite.tex 7 Jan 2005 06:57:27 -0000 1.23.2.2 +++ libsite.tex 16 Oct 2005 05:23:58 -0000 1.23.2.3 @@ -16,9 +16,9 @@ It starts by constructing up to four directories from a head and a tail part. For the head part, it uses \code{sys.prefix} and \code{sys.exec_prefix}; empty heads are skipped. For -the tail part, it uses the empty string (on Macintosh or Windows) or -it uses first \file{lib/python\shortversion/site-packages} and then -\file{lib/site-python} (on \UNIX). For each of the distinct +the tail part, it uses the empty string (on Windows) or +\file{lib/python\shortversion/site-packages} (on \UNIX{} and Macintosh) +and then \file{lib/site-python}. For each of the distinct head-tail combinations, it sees if it refers to an existing directory, and if so, adds it to \code{sys.path} and also inspects the newly added path for configuration files. Index: libsmtplib.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libsmtplib.tex,v retrieving revision 1.22.2.2 retrieving revision 1.22.2.3 diff -u -d -r1.22.2.2 -r1.22.2.3 --- libsmtplib.tex 7 Jan 2005 06:57:27 -0000 1.22.2.2 +++ libsmtplib.tex 16 Oct 2005 05:23:58 -0000 1.22.2.3 @@ -190,13 +190,14 @@ \begin{methoddesc}{sendmail}{from_addr, to_addrs, msg\optional{, mail_options, rcpt_options}} Send mail. The required arguments are an \rfc{822} from-address -string, a list of \rfc{822} to-address strings, and a message string. -The caller may pass a list of ESMTP options (such as \samp{8bitmime}) -to be used in \samp{MAIL FROM} commands as \var{mail_options}. ESMTP -options (such as \samp{DSN} commands) that should be used with all -\samp{RCPT} commands can be passed as \var{rcpt_options}. (If you -need to use different ESMTP options to different recipients you have -to use the low-level methods such as \method{mail}, \method{rcpt} and +string, a list of \rfc{822} to-address strings (a bare string will be +treated as a list with 1 address), and a message string. The caller +may pass a list of ESMTP options (such as \samp{8bitmime}) to be used +in \samp{MAIL FROM} commands as \var{mail_options}. ESMTP options +(such as \samp{DSN} commands) that should be used with all \samp{RCPT} +commands can be passed as \var{rcpt_options}. (If you need to use +different ESMTP options to different recipients you have to use the +low-level methods such as \method{mail}, \method{rcpt} and \method{data} to send the message.) \note{The \var{from_addr} and \var{to_addrs} parameters are Index: libsocket.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libsocket.tex,v retrieving revision 1.68.2.2 retrieving revision 1.68.2.3 diff -u -d -r1.68.2.2 -r1.68.2.3 --- libsocket.tex 7 Jan 2005 06:57:27 -0000 1.68.2.2 +++ libsocket.tex 16 Oct 2005 05:23:58 -0000 1.68.2.3 @@ -7,7 +7,8 @@ This module provides access to the BSD \emph{socket} interface. It is available on all modern \UNIX{} systems, Windows, MacOS, BeOS, -OS/2, and probably additional platforms. +OS/2, and probably additional platforms. \note{Some behavior may be +platform dependent, since calls are made to the operating system socket APIs.} For an introduction to socket programming (in C), see the following papers: \citetitle{An Introductory 4.3BSD Interprocess Communication @@ -202,8 +203,8 @@ host. To find the fully qualified name, the hostname returned by \function{gethostbyaddr()} is checked, then aliases for the host, if available. The first name which includes a period is selected. In -case no fully qualified domain name is available, the hostname is -returned. +case no fully qualified domain name is available, the hostname as +returned by \function{gethostname()} is returned. \versionadded{2.0} \end{funcdesc} Index: libsocksvr.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libsocksvr.tex,v retrieving revision 1.14.24.2 retrieving revision 1.14.24.3 diff -u -d -r1.14.24.2 -r1.14.24.3 --- libsocksvr.tex 7 Jan 2005 06:57:27 -0000 1.14.24.2 +++ libsocksvr.tex 16 Oct 2005 05:23:58 -0000 1.14.24.3 @@ -52,10 +52,86 @@ \setindexsubitem{(SocketServer protocol)} +\subsection{Server Creation Notes} + +There are five classes in an inheritance diagram, four of which represent +synchronous servers of four types: + +\begin{verbatim} + +------------+ + | BaseServer | + +------------+ + | + v + +-----------+ +------------------+ + | TCPServer |------->| UnixStreamServer | + +-----------+ +------------------+ + | + v + +-----------+ +--------------------+ + | UDPServer |------->| UnixDatagramServer | + +-----------+ +--------------------+ +\end{verbatim} + +Note that \class{UnixDatagramServer} derives from \class{UDPServer}, not +from \class{UnixStreamServer} -- the only difference between an IP and a +Unix stream server is the address family, which is simply repeated in both +unix server classes. + +Forking and threading versions of each type of server can be created using +the \class{ForkingMixIn} and \class{ThreadingMixIn} mix-in classes. For +instance, a threading UDP server class is created as follows: + +\begin{verbatim} + class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass +\end{verbatim} + +The mix-in class must come first, since it overrides a method defined in +\class{UDPServer}. Setting the various member variables also changes the +behavior of the underlying server mechanism. + +To implement a service, you must derive a class from +\class{BaseRequestHandler} and redefine its \method{handle()} method. You +can then run various versions of the service by combining one of the server +classes with your request handler class. The request handler class must be +different for datagram or stream services. This can be hidden by using the +handler subclasses \class{StreamRequestHandler} or \class{DatagramRequestHandler}. + +Of course, you still have to use your head! For instance, it makes no sense +to use a forking server if the service contains state in memory that can be +modified by different requests, since the modifications in the child process +would never reach the initial state kept in the parent process and passed to +each child. In this case, you can use a threading server, but you will +probably have to use locks to protect the integrity of the shared data. + +On the other hand, if you are building an HTTP server where all data is +stored externally (for instance, in the file system), a synchronous class +will essentially render the service "deaf" while one request is being +handled -- which may be for a very long time if a client is slow to receive +all the data it has requested. Here a threading or forking server is +appropriate. + +In some cases, it may be appropriate to process part of a request +synchronously, but to finish processing in a forked child depending on the +request data. This can be implemented by using a synchronous server and +doing an explicit fork in the request handler class \method{handle()} +method. + +Another approach to handling multiple simultaneous requests in an +environment that supports neither threads nor \function{fork()} (or where +these are too expensive or inappropriate for the service) is to maintain an +explicit table of partially finished requests and to use \function{select()} +to decide which request to work on next (or whether to handle a new incoming +request). This is particularly important for stream services where each +client can potentially be connected for a long time (if threads or +subprocesses cannot be used). + %XXX should data and methods be intermingled, or separate? % how should the distinction between class and instance variables be % drawn? +\subsection{Server Objects} + \begin{funcdesc}{fileno}{} Return an integer file descriptor for the socket on which the server is listening. This function is most commonly passed to @@ -160,7 +236,8 @@ % instance variables, adding new network families? \begin{funcdesc}{server_activate}{} -Called by the server's constructor to activate the server. +Called by the server's constructor to activate the server. The default +behavior just \method{listen}s to the server's socket. May be overridden. \end{funcdesc} @@ -176,6 +253,8 @@ The default implementation always returns \constant{True}. \end{funcdesc} +\subsection{RequestHandler Objects} + The request handler class must define a new \method{handle()} method, and can override any of the following methods. A new instance is created for each request. @@ -189,6 +268,7 @@ \begin{funcdesc}{handle}{} This function must do all the work required to service a request. +The default implementation does nothing. Several instance attributes are available to it; the request is available as \member{self.request}; the client address as \member{self.client_address}; and the server instance as @@ -198,11 +278,10 @@ The type of \member{self.request} is different for datagram or stream services. For stream services, \member{self.request} is a socket object; for datagram services, \member{self.request} is a string. -However, this can be hidden by using the mix-in request handler -classes +However, this can be hidden by using the request handler subclasses \class{StreamRequestHandler} or \class{DatagramRequestHandler}, which override the \method{setup()} and \method{finish()} methods, and -provides \member{self.rfile} and \member{self.wfile} attributes. +provide \member{self.rfile} and \member{self.wfile} attributes. \member{self.rfile} and \member{self.wfile} can be read or written, respectively, to get the request data or return data to the client. \end{funcdesc} Index: libstdtypes.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libstdtypes.tex,v retrieving revision 1.98.2.2 retrieving revision 1.98.2.3 diff -u -d -r1.98.2.2 -r1.98.2.3 --- libstdtypes.tex 7 Jan 2005 06:57:27 -0000 1.98.2.2 +++ libstdtypes.tex 16 Oct 2005 05:23:58 -0000 1.98.2.3 @@ -72,7 +72,9 @@ \index{False} \index{True} -\subsection{Boolean Operations \label{boolean}} +\subsection{Boolean Operations --- + \keyword{and}, \keyword{or}, \keyword{not} + \label{boolean}} These are the Boolean operations, ordered by ascending priority: \indexii{Boolean}{operations} @@ -173,7 +175,9 @@ only by sequence types (below). -\subsection{Numeric Types \label{typesnumeric}} +\subsection{Numeric Types --- + \class{int}, \class{float}, \class{long}, \class{complex} + \label{typesnumeric}} There are four distinct numeric types: \dfn{plain integers}, \dfn{long integers}, @@ -256,7 +260,7 @@ \lineiii{float(\var{x})}{\var{x} converted to floating point}{} \lineiii{complex(\var{re},\var{im})}{a complex number with real part \var{re}, imaginary part \var{im}. \var{im} defaults to zero.}{} \lineiii{\var{c}.conjugate()}{conjugate of the complex number \var{c}}{} - \lineiii{divmod(\var{x}, \var{y})}{the pair \code{(\var{x} / \var{y}, \var{x} \%{} \var{y})}}{(3)(4)} + \lineiii{divmod(\var{x}, \var{y})}{the pair \code{(\var{x} // \var{y}, \var{x} \%{} \var{y})}}{(3)(4)} \lineiii{pow(\var{x}, \var{y})}{\var{x} to the power \var{y}}{} \lineiii{\var{x} ** \var{y}}{\var{x} to the power \var{y}}{} \end{tableiii} @@ -405,7 +409,10 @@ supplying the \method{__iter__()} and \method{next()} methods. -\subsection{Sequence Types \label{typesseq}} +\subsection{Sequence Types --- + \class{str}, \class{unicode}, \class{list}, + \class{tuple}, \class{buffer}, \class{xrange} + \label{typesseq}} There are six sequence types: strings, Unicode strings, lists, tuples, buffers, and xrange objects. @@ -699,11 +706,17 @@ \end{methoddesc} \begin{methoddesc}[string]{lstrip}{\optional{chars}} -Return a copy of the string with leading characters removed. If -\var{chars} is omitted or \code{None}, whitespace characters are -removed. If given and not \code{None}, \var{chars} must be a string; -the characters in the string will be stripped from the beginning of -the string this method is called on. +Return a copy of the string with leading characters removed. The +\var{chars} argument is a string specifying the set of characters +to be removed. If omitted or \code{None}, the \var{chars} argument +defaults to removing whitespace. The \var{chars} argument is not +a prefix; rather, all combinations of its values are stripped: +\begin{verbatim} + >>> ' spacious '.lstrip() + 'spacious ' + >>> 'www.example.com'.lstrip('cmowz.') + 'example.com' +\end{verbatim} \versionchanged[Support for the \var{chars} argument]{2.2.2} \end{methoddesc} @@ -738,16 +751,24 @@ Return a list of the words in the string, using \var{sep} as the delimiter string. If \var{maxsplit} is given, at most \var{maxsplit} splits are done, the \emph{rightmost} ones. If \var{sep} is not specified -or \code{None}, any whitespace string is a separator. +or \code{None}, any whitespace string is a separator. Except for splitting +from the right, \method{rsplit()} behaves like \method{split()} which +is described in detail below. \versionadded{2.4} \end{methoddesc} \begin{methoddesc}[string]{rstrip}{\optional{chars}} -Return a copy of the string with trailing characters removed. If -\var{chars} is omitted or \code{None}, whitespace characters are -removed. If given and not \code{None}, \var{chars} must be a string; -the characters in the string will be stripped from the end of the -string this method is called on. +Return a copy of the string with trailing characters removed. The +\var{chars} argument is a string specifying the set of characters +to be removed. If omitted or \code{None}, the \var{chars} argument +defaults to removing whitespace. The \var{chars} argument is not +a suffix; rather, all combinations of its values are stripped: +\begin{verbatim} + >>> ' spacious '.rstrip() + ' spacious' + >>> 'mississippi'.rstrip('ipz') + 'mississ' +\end{verbatim} \versionchanged[Support for the \var{chars} argument]{2.2.2} \end{methoddesc} @@ -755,21 +776,23 @@ Return a list of the words in the string, using \var{sep} as the delimiter string. If \var{maxsplit} is given, at most \var{maxsplit} splits are done. (thus, the list will have at most \code{\var{maxsplit}+1} -elements). If \var{maxsplit} is not specified or is zero, then there +elements). If \var{maxsplit} is not specified, then there is no limit on the number of splits (all possible splits are made). Consecutive delimiters are not grouped together and are deemed to delimit empty strings (for example, \samp{'1,,2'.split(',')} returns \samp{['1', '', '2']}). The \var{sep} argument may consist of multiple characters (for example, \samp{'1, 2, 3'.split(', ')} returns \samp{['1', '2', '3']}). Splitting an empty string with a specified -separator returns an empty list. +separator returns \samp{['']}. If \var{sep} is not specified or is \code{None}, a different splitting -algorithm is applied. Words are separated by arbitrary length strings of -whitespace characters (spaces, tabs, newlines, returns, and formfeeds). -Consecutive whitespace delimiters are treated as a single delimiter -(\samp{'1 2 3'.split()} returns \samp{['1', '2', '3']}). Splitting an -empty string returns \samp{['']}. +algorithm is applied. First, whitespace characters (spaces, tabs, +newlines, returns, and formfeeds) are stripped from both ends. Then, +words are separated by arbitrary length strings of whitespace +characters. Consecutive whitespace delimiters are treated as a single +delimiter (\samp{'1 2 3'.split()} returns \samp{['1', '2', '3']}). +Splitting an empty string or a string consisting of just whitespace +returns an empty list. \end{methoddesc} \begin{methoddesc}[string]{splitlines}{\optional{keepends}} @@ -787,11 +810,17 @@ \end{methoddesc} \begin{methoddesc}[string]{strip}{\optional{chars}} -Return a copy of the string with leading and trailing characters -removed. If \var{chars} is omitted or \code{None}, whitespace -characters are removed. If given and not \code{None}, \var{chars} -must be a string; the characters in the string will be stripped from -the both ends of the string this method is called on. +Return a copy of the string with the leading and trailing characters +removed. The \var{chars} argument is a string specifying the set of +characters to be removed. If omitted or \code{None}, the \var{chars} +argument defaults to removing whitespace. The \var{chars} argument is not +a prefix or suffix; rather, all combinations of its values are stripped: +\begin{verbatim} + >>> ' spacious '.strip() + 'spacious' + >>> 'www.example.com'.strip('cmowz.') + 'example' +\end{verbatim} \versionchanged[Support for the \var{chars} argument]{2.2.2} \end{methoddesc} @@ -1071,7 +1100,7 @@ no longer works in Python 2.0. Use of this misfeature has been deprecated since Python 1.4. -\item[(3)] Raises an exception when \var{x} is not a list object. +\item[(3)] \var{x} can be any iterable object. \item[(4)] Raises \exception{ValueError} when \var{x} is not found in \var{s}. When a negative index is passed as the second or third parameter @@ -1135,7 +1164,9 @@ that the list has been mutated during a sort. \end{description} -\subsection{Set Types \label{types-set}} +\subsection{Set Types --- + \class{set}, \class{frozenset} + \label{types-set}} \obindex{set} A \dfn{set} object is an unordered collection of immutable values. @@ -1216,6 +1247,9 @@ Since sets only define partial ordering (subset relationships), the output of the \method{list.sort()} method is undefined for lists of sets. +Set elements are like dictionary keys; they need to define both +\method{__hash__} and \method{__eq__} methods. + Binary operations that mix \class{set} instances with \class{frozenset} return the type of the first operand. For example: \samp{frozenset('ab') | set('bc')} returns an instance of \class{frozenset}. @@ -1257,8 +1291,18 @@ \method{symmetric_difference_update()} methods will accept any iterable as an argument. +The design of the set types was based on lessons learned from the +\module{sets} module. + +\begin{seealso} + \seelink{comparison-to-builtin-set.html} + {Comparison to the built-in set types} + {Differences between the \module{sets} module and the + built-in set types.} +\end{seealso} + -\subsection{Mapping Types \label{typesmapping}} +\subsection{Mapping Types --- \class{dict} \label{typesmapping}} \obindex{mapping} \obindex{dictionary} @@ -1391,7 +1435,8 @@ the dictionary as the value of \var{k}. \var{x} defaults to \var{None}. \item[(6)] \function{popitem()} is useful to destructively iterate -over a dictionary, as often used in set algorithms. +over a dictionary, as often used in set algorithms. If the dictionary +is empty, calling \function{popitem()} raises a \exception{KeyError}. \item[(7)] \function{fromkeys()} is a class method that returns a new dictionary. \var{value} defaults to \code{None}. \versionadded{2.3} Index: libstdwin.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libstdwin.tex,v retrieving revision 1.25.10.1 retrieving revision 1.25.10.2 diff -u -d -r1.25.10.1 -r1.25.10.2 --- libstdwin.tex 7 Jan 2005 06:57:28 -0000 1.25.10.1 +++ libstdwin.tex 16 Oct 2005 05:23:58 -0000 1.25.10.2 @@ -114,7 +114,7 @@ \end{funcdesc} \begin{funcdesc}{getscrmm}{} -Return the screen size in millimeters. +Return the screen size in millimetres. \end{funcdesc} \begin{funcdesc}{fetchcolor}{colorname} Index: libstringio.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libstringio.tex,v retrieving revision 1.6.20.2 retrieving revision 1.6.20.3 diff -u -d -r1.6.20.2 -r1.6.20.3 --- libstringio.tex 7 Jan 2005 06:57:28 -0000 1.6.20.2 +++ libstringio.tex 16 Oct 2005 05:23:58 -0000 1.6.20.3 @@ -14,6 +14,7 @@ When a \class{StringIO} object is created, it can be initialized to an existing string by passing the string to the constructor. If no string is given, the \class{StringIO} will start empty. +In both cases, the initial file position starts at zero. The \class{StringIO} object can accept either Unicode or 8-bit strings, but mixing the two may take some care. If both are used, Index: libsubprocess.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libsubprocess.tex,v retrieving revision 1.5.2.1 retrieving revision 1.5.2.2 diff -u -d -r1.5.2.1 -r1.5.2.2 --- libsubprocess.tex 7 Jan 2005 06:57:28 -0000 1.5.2.1 +++ libsubprocess.tex 16 Oct 2005 05:23:58 -0000 1.5.2.2 @@ -189,7 +189,7 @@ \begin{methoddesc}{communicate}{input=None} Interact with process: Send data to stdin. Read data from stdout and stderr, until end-of-file is reached. Wait for process to terminate. -The optional \var{stdin} argument should be a string to be sent to the +The optional \var{input} argument should be a string to be sent to the child process, or \code{None}, if no data should be sent to the child. communicate() returns a tuple (stdout, stderr). @@ -374,7 +374,7 @@ \begin{verbatim} (child_stdout, child_stdin) = popen2.popen2("somestring", bufsize, mode) ==> -p = Popen(["somestring"], shell=True, bufsize=bufsize +p = Popen(["somestring"], shell=True, bufsize=bufsize, stdin=PIPE, stdout=PIPE, close_fds=True) (child_stdout, child_stdin) = (p.stdout, p.stdin) \end{verbatim} Index: libtarfile.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libtarfile.tex,v retrieving revision 1.1.6.2 retrieving revision 1.1.6.3 diff -u -d -r1.1.6.2 -r1.1.6.3 --- libtarfile.tex 7 Jan 2005 06:57:28 -0000 1.1.6.2 +++ libtarfile.tex 16 Oct 2005 05:23:58 -0000 1.1.6.3 @@ -32,7 +32,7 @@ it defaults to \code{'r'}. Here is a full list of mode combinations: \begin{tableii}{c|l}{code}{mode}{action} - \lineii{'r'}{Open for reading with transparent compression (recommended).} + \lineii{'r' or 'r:*'}{Open for reading with transparent compression (recommended).} \lineii{'r:'}{Open for reading exclusively without compression.} \lineii{'r:gz'}{Open for reading with gzip compression.} \lineii{'r:bz2'}{Open for reading with bzip2 compression.} @@ -65,6 +65,7 @@ (section~\ref{tar-examples}). The currently possible modes: \begin{tableii}{c|l}{code}{Mode}{Action} + \lineii{'r|*'}{Open a \emph{stream} of tar blocks for reading with transparent compression.} \lineii{'r|'}{Open a \emph{stream} of uncompressed tar blocks for reading.} \lineii{'r|gz'}{Open a gzip compressed \emph{stream} for reading.} \lineii{'r|bz2'}{Open a bzip2 compressed \emph{stream} for reading.} @@ -127,8 +128,8 @@ \seemodule{zipfile}{Documentation of the \refmodule{zipfile} standard module.} - \seetitle[http://www.gnu.org/manual/tar/html_chapter/tar_8.html\#SEC118] - {GNU tar manual, Standard Section}{Documentation for tar archive files, + \seetitle[http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html\#SEC134] + {GNU tar manual, Basic Tar Format}{Documentation for tar archive files, including GNU tar extensions.} \end{seealso} @@ -196,12 +197,29 @@ more available. \end{methoddesc} +\begin{methoddesc}{extractall}{\optional{path\optional{, members}}} + Extract all members from the archive to the current working directory + or directory \var{path}. If optional \var{members} is given, it must be + a subset of the list returned by \method{getmembers()}. + Directory informations like owner, modification time and permissions are + set after all members have been extracted. This is done to work around two + problems: A directory's modification time is reset each time a file is + created in it. And, if a directory's permissions do not allow writing, + extracting files to it will fail. + \versionadded{2.5} +\end{methoddesc} + \begin{methoddesc}{extract}{member\optional{, path}} Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. \var{member} may be a filename or a \class{TarInfo} object. You can specify a different directory using \var{path}. + \begin{notice} + Because the \method{extract()} method allows random access to a tar + archive there are some issues you must take care of yourself. See the + description for \method{extractall()} above. + \end{notice} \end{methoddesc} \begin{methoddesc}{extractfile}{member} @@ -281,7 +299,7 @@ \begin{memberdesc}{debug=0} To be set from \code{0} (no debug messages; the default) up to \code{3} (all debug messages). The messages are written to - \code{sys.stdout}. + \code{sys.stderr}. \end{memberdesc} \begin{memberdesc}{errorlevel} @@ -416,6 +434,14 @@ \subsection{Examples \label{tar-examples}} +How to extract an entire tar archive to the current working directory: +\begin{verbatim} +import tarfile +tar = tarfile.open("sample.tar.gz") +tar.extractall() +tar.close() +\end{verbatim} + How to create an uncompressed tar archive from a list of filenames: \begin{verbatim} import tarfile Index: libtempfile.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libtempfile.tex,v retrieving revision 1.17.8.2 retrieving revision 1.17.8.3 diff -u -d -r1.17.8.2 -r1.17.8.3 --- libtempfile.tex 7 Jan 2005 06:57:28 -0000 1.17.8.2 +++ libtempfile.tex 16 Oct 2005 05:23:58 -0000 1.17.8.3 @@ -146,7 +146,6 @@ \item The directory named by the \envvar{TMP} environment variable. \item A platform-specific location: \begin{itemize} - \item On Macintosh, the \file{Temporary Items} folder. \item On RiscOS, the directory named by the \envvar{Wimp\$ScrapDir} environment variable. \item On Windows, the directories Index: libthread.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libthread.tex,v retrieving revision 1.25.2.2 retrieving revision 1.25.2.3 diff -u -d -r1.25.2.2 -r1.25.2.3 --- libthread.tex 7 Jan 2005 06:57:29 -0000 1.25.2.2 +++ libthread.tex 16 Oct 2005 05:23:58 -0000 1.25.2.3 @@ -81,11 +81,11 @@ Without the optional argument, this method acquires the lock unconditionally, if necessary waiting until it is released by another thread (only one thread at a time can acquire a lock --- that's their -reason for existence), and returns \code{None}. If the integer +reason for existence). If the integer \var{waitflag} argument is present, the action depends on its value: if it is zero, the lock is only acquired if it can be acquired immediately without waiting, while if it is nonzero, the lock is -acquired unconditionally as before. If an argument is present, the +acquired unconditionally as before. The return value is \code{True} if the lock is acquired successfully, \code{False} if not. \end{methoddesc} Index: libthreading.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libthreading.tex,v retrieving revision 1.12.2.2 retrieving revision 1.12.2.3 diff -u -d -r1.12.2.2 -r1.12.2.3 --- libthreading.tex 7 Jan 2005 06:57:29 -0000 1.12.2.2 +++ libthreading.tex 16 Oct 2005 05:23:58 -0000 1.12.2.3 @@ -167,8 +167,7 @@ Acquire a lock, blocking or non-blocking. When invoked without arguments, block until the lock is -unlocked, then set it to locked, and return. There is no -return value in this case. +unlocked, then set it to locked, and return true. When invoked with the \var{blocking} argument set to true, do the same thing as when called without arguments, and return true. @@ -595,7 +594,12 @@ When the \var{timeout} argument is present and not \code{None}, it should be a floating point number specifying a timeout for the -operation in seconds (or fractions thereof). +operation in seconds (or fractions thereof). As \method{join()} always +returns \code{None}, you must call \method{isAlive()} to decide whether +a timeout happened. + +When the \var{timeout} argument is not present or \code{None}, the +operation will block until the thread terminates. A thread can be \method{join()}ed many times. Index: libtokenize.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libtokenize.tex,v retrieving revision 1.5 retrieving revision 1.5.20.1 diff -u -d -r1.5 -r1.5.20.1 --- libtokenize.tex 29 Jun 2001 23:51:07 -0000 1.5 +++ libtokenize.tex 16 Oct 2005 05:23:58 -0000 1.5.20.1 @@ -45,6 +45,9 @@ provides the same interface as the \method{readline()} method of built-in file objects (see section~\ref{bltin-file-objects}). Each call to the function should return one line of input as a string. + Alternately, \var{readline} may be a callable object that signals + completion by raising \exception{StopIteration}. + \versionchanged[Added StopIteration support]{2.5} The second parameter, \var{tokeneater}, must also be a callable object. It is called once for each token, with five arguments, @@ -65,3 +68,52 @@ are generated when a logical line of code is continued over multiple physical lines. \end{datadesc} + +Another function is provided to reverse the tokenization process. +This is useful for creating tools that tokenize a script, modify +the token stream, and write back the modified script. + +\begin{funcdesc}{untokenize}{iterable} + Converts tokens back into Python source code. The \var{iterable} + must return sequences with at least two elements, the token type and + the token string. Any additional sequence elements are ignored. + + The reconstructed script is returned as a single string. The + result is guaranteed to tokenize back to match the input so that + the conversion is lossless and round-trips are assured. The + guarantee applies only to the token type and token string as + the spacing between tokens (column positions) may change. + \versionadded{2.5} +\end{funcdesc} + +Example of a script re-writer that transforms float literals into +Decimal objects: +\begin{verbatim} +def decistmt(s): + """Substitute Decimals for floats in a string of statements. + + >>> from decimal import Decimal + >>> s = 'print +21.3e-5*-.1234/81.7' + >>> decistmt(s) + "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')" + + >>> exec(s) + -3.21716034272e-007 + >>> exec(decistmt(s)) + -3.217160342717258261933904529E-7 + + """ + result = [] + g = generate_tokens(StringIO(s).readline) # tokenize the string + for toknum, tokval, _, _, _ in g: + if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens + result.extend([ + (NAME, 'Decimal'), + (OP, '('), + (STRING, repr(tokval)), + (OP, ')') + ]) + else: + result.append((toknum, tokval)) + return untokenize(result) +\end{verbatim} Index: libunicodedata.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libunicodedata.tex,v retrieving revision 1.3.24.2 retrieving revision 1.3.24.3 diff -u -d -r1.3.24.2 -r1.3.24.3 --- libunicodedata.tex 7 Jan 2005 06:57:29 -0000 1.3.24.2 +++ libunicodedata.tex 16 Oct 2005 05:23:58 -0000 1.3.24.3 @@ -18,7 +18,7 @@ The module uses the same names and symbols as defined by the UnicodeData File Format 3.2.0 (see -\url{http://www.unicode.org/Public/UNIDATA/UnicodeData.html}). It +\url{http://www.unicode.org/Public/3.2-Update/UnicodeData-3.2.0.html}). It defines the following functions: \begin{funcdesc}{lookup}{name} Index: liburllib.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/liburllib.tex,v retrieving revision 1.43.2.2 retrieving revision 1.43.2.3 diff -u -d -r1.43.2.2 -r1.43.2.3 --- liburllib.tex 7 Jan 2005 06:57:29 -0000 1.43.2.2 +++ liburllib.tex 16 Oct 2005 05:23:58 -0000 1.43.2.3 @@ -97,7 +97,7 @@ \begin{verbatim} # Use http://www.someproxy.com:3128 for http proxying -proxies = proxies={'http': 'http://www.someproxy.com:3128'} +proxies = {'http': 'http://www.someproxy.com:3128'} filehandle = urllib.urlopen(some_url, proxies=proxies) # Don't use any proxies filehandle = urllib.urlopen(some_url, proxies={}) @@ -142,6 +142,25 @@ (normally the request type is \code{GET}). The \var{data} argument must in standard \mimetype{application/x-www-form-urlencoded} format; see the \function{urlencode()} function below. + +\versionchanged[ +\function{urlretrieve()} will raise \exception{ContentTooShortError} +when it detects that the amount of data available +was less than the expected amount (which is the size reported by a +\var{Content-Length} header). This can occur, for example, when the +download is interrupted. + +The \var{Content-Length} is treated as a lower bound: if there's more data +to read, urlretrieve reads more data, but if less data is available, +it raises the exception. + +You can still retrieve the downloaded data in this case, it is stored +in the \member{content} attribute of the exception instance. + +If no \var{Content-Length} header was supplied, urlretrieve can +not check the size of the data it has downloaded, and just returns it. +In this case you just have to assume that the download was successful]{2.5} + \end{funcdesc} \begin{datadesc}{_urlopener} @@ -160,9 +179,7 @@ import urllib class AppURLopener(urllib.FancyURLopener): - def __init__(self, *args): - self.version = "App/1.7" - urllib.FancyURLopener.__init__(self, *args) + version = "App/1.7" urllib._urlopener = AppURLopener() \end{verbatim} @@ -243,9 +260,9 @@ \mailheader{User-Agent} header of \samp{urllib/\var{VVV}}, where \var{VVV} is the \module{urllib} version number. Applications can define their own \mailheader{User-Agent} header by subclassing -\class{URLopener} or \class{FancyURLopener} and setting the instance -attribute \member{version} to an appropriate string value before the -\method{open()} method is called. +\class{URLopener} or \class{FancyURLopener} and setting the class +attribute \member{version} to an appropriate string value in the +subclass definition. The optional \var{proxies} parameter should be a dictionary mapping scheme names to proxy URLs, where an empty dictionary turns proxies @@ -285,6 +302,15 @@ if needed.} \end{classdesc} +\begin{excclassdesc}{ContentTooShortError}{msg\optional{, content}} +This exception is raised when the \function{urlretrieve()} function +detects that the amount of the downloaded data is less than the +expected amount (given by the \var{Content-Length} header). The +\member{content} attribute stores the downloaded (and supposedly +truncated) data. +\versionadded{2.5} +\end{excclassdesc} + Restrictions: \begin{itemize} @@ -319,7 +345,7 @@ \item The data returned by \function{urlopen()} or \function{urlretrieve()} is the raw data returned by the server. This may be binary data -(e.g. an image), plain text or (for example) HTML\index{HTML}. The +(such as an image), plain text or (for example) HTML\index{HTML}. The HTTP\indexii{HTTP}{protocol} protocol provides type information in the reply header, which can be inspected by looking at the \mailheader{Content-Type} header. For the Index: liburllib2.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/liburllib2.tex,v retrieving revision 1.6.10.2 retrieving revision 1.6.10.3 diff -u -d -r1.6.10.2 -r1.6.10.3 --- liburllib2.tex 7 Jan 2005 06:57:29 -0000 1.6.10.2 +++ liburllib2.tex 16 Oct 2005 05:23:58 -0000 1.6.10.3 @@ -71,6 +71,15 @@ \end{funcdesc} +The following attribute is defined: + +\begin{datadesc}{httpresponses} +A mapping between HTTP status codes and the W3C names. + +Example: \code{urllib2.httpresponses[404]} is \code{'Not Found'}. +\versionadded{2.5} +\end{datadesc} + The following exceptions are raised as appropriate: \begin{excdesc}{URLError} @@ -254,8 +263,8 @@ \begin{methoddesc}[Request]{get_method}{} Return a string indicating the HTTP request method. This is only -meaningful for HTTP requests, and currently always takes one of the -values ("GET", "POST"). +meaningful for HTTP requests, and currently always returns +\code{'GET'} or \code{'POST'}. \end{methoddesc} \begin{methoddesc}[Request]{has_data}{} Index: liburlparse.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/liburlparse.tex,v retrieving revision 1.20.8.1 retrieving revision 1.20.8.2 diff -u -d -r1.20.8.1 -r1.20.8.2 --- liburlparse.tex 28 Apr 2003 17:34:09 -0000 1.20.8.1 +++ liburlparse.tex 16 Oct 2005 05:23:58 -0000 1.20.8.2 @@ -69,9 +69,9 @@ params from the URL. This should generally be used instead of \function{urlparse()} if the more recent URL syntax allowing parameters to be applied to each segment of the \var{path} portion of -the URL (see \rfc{2396}). A separate function is needed to separate -the path segments and parameters. This function returns a 5-tuple: -(addressing scheme, network location, path, query, fragment +the URL (see \rfc{2396}) is wanted. A separate function is needed to +separate the path segments and parameters. This function returns a +5-tuple: (addressing scheme, network location, path, query, fragment identifier). \versionadded{2.2} \end{funcdesc} Index: libuserdict.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libuserdict.tex,v retrieving revision 1.21.2.2 retrieving revision 1.21.2.3 diff -u -d -r1.21.2.2 -r1.21.2.3 --- libuserdict.tex 7 Jan 2005 06:57:29 -0000 1.21.2.2 +++ libuserdict.tex 16 Oct 2005 05:23:58 -0000 1.21.2.3 @@ -21,18 +21,24 @@ The \module{UserDict} module defines the \class{UserDict} class and \class{DictMixin}: -\begin{classdesc}{UserDict}{\optional{initialdata}} -Class that simulates a dictionary. The instance's -contents are kept in a regular dictionary, which is accessible via the -\member{data} attribute of \class{UserDict} instances. If -\var{initialdata} is provided, \member{data} is initialized with its -contents; note that a reference to \var{initialdata} will not be kept, -allowing it be used for other purposes. +\begin{classdesc}{UserDict}{\optional{initialdata}} +Class that simulates a dictionary. The instance's contents are kept +in a regular dictionary, which is accessible via the \member{data} +attribute of \class{UserDict} instances. If \var{initialdata} is +provided, \member{data} is initialized with its contents; note that a +reference to \var{initialdata} will not be kept, allowing it be used +for other purposes. \note{For backward compatibility, instances of +\class{UserDict} are not iterable.} +\end{classdesc} + +\begin{classdesc}{IterableUserDict}{\optional{initialdata}} +Subclass of \class{UserDict} that supports direct iteration (e.g. +\code{for key in myDict}). \end{classdesc} In addition to supporting the methods and operations of mappings (see -section \ref{typesmapping}), \class{UserDict} instances provide the -following attribute: +section \ref{typesmapping}), \class{UserDict} and +\class{IterableUserDict} instances provide the following attribute: \begin{memberdesc}{data} A real dictionary used to store the contents of the \class{UserDict} Index: libuu.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libuu.tex,v retrieving revision 1.11.18.2 retrieving revision 1.11.18.3 diff -u -d -r1.11.18.2 -r1.11.18.3 --- libuu.tex 7 Jan 2005 06:57:29 -0000 1.11.18.2 +++ libuu.tex 16 Oct 2005 05:23:58 -0000 1.11.18.3 @@ -31,13 +31,18 @@ respectively. \end{funcdesc} -\begin{funcdesc}{decode}{in_file\optional{, out_file\optional{, mode}}} +\begin{funcdesc}{decode}{in_file\optional{, out_file\optional{, mode\optional{, quiet}}}} This call decodes uuencoded file \var{in_file} placing the result on file \var{out_file}. If \var{out_file} is a pathname, \var{mode} is used to set the permission bits if the file must be created. Defaults for \var{out_file} and \var{mode} are taken from the uuencode header. However, if the file specified in the header already exists, a \exception{uu.Error} is raised. + + \function{decode()} may print a warning to standard error if the + input was produced by an incorrect uuencoder and Python could + recover from that error. Setting \var{quiet} to a true value + silences this warning. \end{funcdesc} \begin{excclassdesc}{Error}{} Index: libweakref.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libweakref.tex,v retrieving revision 1.17.8.2 retrieving revision 1.17.8.3 diff -u -d -r1.17.8.2 -r1.17.8.3 --- libweakref.tex 7 Jan 2005 06:57:29 -0000 1.17.8.2 +++ libweakref.tex 16 Oct 2005 05:23:58 -0000 1.17.8.3 @@ -10,6 +10,8 @@ \versionadded{2.1} +% When making changes to the examples in this file, be sure to update +% Lib/test/test_weakref.py::libreftest too! The \module{weakref} module allows the Python programmer to create \dfn{weak references} to objects. @@ -228,7 +230,7 @@ o = r() if o is None: # referent has been garbage collected - print "Object has been allocated; can't frobnicate." + print "Object has been deallocated; can't frobnicate." else: print "Object is still live!" o.do_something_useful() @@ -255,20 +257,17 @@ import weakref class ExtendedRef(weakref.ref): - def __new__(cls, ob, callback=None, **annotations): - weakref.ref.__new__(cls, ob, callback) - self.__counter = 0 - def __init__(self, ob, callback=None, **annotations): super(ExtendedRef, self).__init__(ob, callback) - for k, v in annotations: + self.__counter = 0 + for k, v in annotations.iteritems(): setattr(self, k, v) def __call__(self): """Return a pair containing the referent and the number of times the reference has been called. """ - ob = super(ExtendedRef, self)() + ob = super(ExtendedRef, self).__call__() if ob is not None: self.__counter += 1 ob = (ob, self.__counter) Index: libwebbrowser.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libwebbrowser.tex,v retrieving revision 1.10 retrieving revision 1.10.18.1 diff -u -d -r1.10 -r1.10.18.1 --- libwebbrowser.tex 19 Jul 2001 03:49:33 -0000 1.10 +++ libwebbrowser.tex 16 Oct 2005 05:23:58 -0000 1.10.18.1 @@ -6,30 +6,36 @@ \moduleauthor{Fred L. Drake, Jr.}{fdrake at acm.org} \sectionauthor{Fred L. Drake, Jr.}{fdrake at acm.org} -The \module{webbrowser} module provides a very high-level interface to -allow displaying Web-based documents to users. The controller objects -are easy to use and are platform-independent. Under most +The \module{webbrowser} module provides a high-level interface to +allow displaying Web-based documents to users. Under most circumstances, simply calling the \function{open()} function from this module will do the right thing. -Under \UNIX, graphical browsers are preferred under X11, but text-mode +Under \UNIX{}, graphical browsers are preferred under X11, but text-mode browsers will be used if graphical browsers are not available or an X11 display isn't available. If text-mode browsers are used, the calling process will block until the user exits the browser. -Under \UNIX, if the environment variable \envvar{BROWSER} exists, it +If the environment variable \envvar{BROWSER} exists, it is interpreted to override the platform default list of browsers, as a -colon-separated list of browsers to try in order. When the value of +os.pathsep-separated list of browsers to try in order. When the value of a list part contains the string \code{\%s}, then it is interpreted as a literal browser command line to be used with the argument URL substituted for the \code{\%s}; if the part does not contain \code{\%s}, it is simply interpreted as the name of the browser to launch. -For non-\UNIX{} platforms, or when X11 browsers are available on -\UNIX, the controlling process will not wait for the user to finish -with the browser, but allow the browser to maintain its own window on -the display. +For non-\UNIX{} platforms, or when a remote browser is available on +\UNIX{}, the controlling process will not wait for the user to finish +with the browser, but allow the remote browser to maintain its own +windows on the display. If remote browsers are not available on \UNIX{}, +the controlling process will launch a new browser and wait. + +The script \program{webbrowser} can be used as a command-line interface +for the module. It accepts an URL as the argument. It accepts the following +optional parameters: \programopt{-n} opens the URL in a new browser window, +if possible; \programopt{-t} opens the URL in a new browser page ("tab"). The +options are, naturally, mutually exclusive. The following exception is defined: @@ -39,11 +45,14 @@ The following functions are defined: -\begin{funcdesc}{open}{url\optional{, new=0}\optional{, autoraise=1}} - Display \var{url} using the default browser. If \var{new} is true, - a new browser window is opened if possible. If \var{autoraise} is +\begin{funcdesc}{open}{url\optional{, new=0\optional{, autoraise=1}}} + Display \var{url} using the default browser. If \var{new} is 0, the + \var{url} is opened in the same browser window. If \var{new} is 1, + a new browser window is opened if possible. If \var{new} is 2, + a new browser page ("tab") is opened if possible. If \var{autoraise} is true, the window is raised if possible (note that under many window managers this will occur regardless of the setting of this variable). + \end{funcdesc} \begin{funcdesc}{open_new}{url} @@ -51,6 +60,12 @@ otherwise, open \var{url} in the only browser window. \end{funcdesc} +\begin{funcdesc}{open_new_tab}{url} + Open \var{url} in a new page ("tab") of the default browser, if possible, + otherwise equivalent to \function{open_new}. +\versionadded{2.5} +\end{funcdesc} + \begin{funcdesc}{get}{\optional{name}} Return a controller object for the browser type \var{name}. If \var{name} is empty, return a controller for a default browser @@ -67,7 +82,7 @@ This entry point is only useful if you plan to either set the \envvar{BROWSER} variable or call \function{get} with a nonempty - argument matching the name of a handler you declare. + argument matching the name of a handler you declare. \end{funcdesc} A number of browser types are predefined. This table gives the type @@ -76,16 +91,24 @@ in this module. \begin{tableiii}{l|l|c}{code}{Type Name}{Class Name}{Notes} - \lineiii{'mozilla'}{\class{Netscape('mozilla')}}{} - \lineiii{'netscape'}{\class{Netscape('netscape')}}{} - \lineiii{'mosaic'}{\class{GenericBrowser('mosaic \%s \&')}}{} + \lineiii{'mozilla'}{\class{Mozilla('mozilla')}}{} + \lineiii{'firefox'}{\class{Mozilla('mozilla')}}{} + \lineiii{'netscape'}{\class{Mozilla('netscape')}}{} + \lineiii{'galeon'}{\class{Galeon('galeon')}}{} + \lineiii{'epiphany'}{\class{Galeon('epiphany')}}{} + \lineiii{'skipstone'}{\class{GenericBrowser('skipstone \%s \&')}}{} + \lineiii{'konqueror'}{\class{Konqueror()}}{(1)} \lineiii{'kfm'}{\class{Konqueror()}}{(1)} + \lineiii{'mosaic'}{\class{GenericBrowser('mosaic \%s \&')}}{} + \lineiii{'opera'}{\class{Opera()}}{} \lineiii{'grail'}{\class{Grail()}}{} \lineiii{'links'}{\class{GenericBrowser('links \%s')}}{} + \lineiii{'elinks'}{\class{Elinks('elinks')}}{} \lineiii{'lynx'}{\class{GenericBrowser('lynx \%s')}}{} \lineiii{'w3m'}{\class{GenericBrowser('w3m \%s')}}{} \lineiii{'windows-default'}{\class{WindowsDefault}}{(2)} \lineiii{'internet-config'}{\class{InternetConfig}}{(3)} + \lineiii{'macosx'}{\class{MacOSX('default')}}{(4)} \end{tableiii} \noindent @@ -94,20 +117,22 @@ \begin{description} \item[(1)] ``Konqueror'' is the file manager for the KDE desktop environment for -UNIX, and only makes sense to use if KDE is running. Some way of +\UNIX{}, and only makes sense to use if KDE is running. Some way of reliably detecting KDE would be nice; the \envvar{KDEDIR} variable is not sufficient. Note also that the name ``kfm'' is used even when using the \program{konqueror} command with KDE 2 --- the implementation selects the best strategy for running Konqueror. \item[(2)] -Only on Windows platforms; requires the common -extension modules \module{win32api} and \module{win32con}. +Only on Windows platforms. \item[(3)] Only on MacOS platforms; requires the standard MacPython \module{ic} module, described in the \citetitle[../mac/module-ic.html]{Macintosh Library Modules} manual. + +\item[(4)] +Only on MacOS X platform. \end{description} @@ -116,13 +141,20 @@ Browser controllers provide two methods which parallel two of the module-level convenience functions: -\begin{funcdesc}{open}{url\optional{, new}} - Display \var{url} using the browser handled by this controller. If - \var{new} is true, a new browser window is opened if possible. +\begin{funcdesc}{open}{url\optional{, new\optional{, autoraise=1}}} + Display \var{url} using the browser handled by this controller. + If \var{new} is 1, a new browser window is opened if possible. + If \var{new} is 2, a new browser page ("tab") is opened if possible. \end{funcdesc} \begin{funcdesc}{open_new}{url} Open \var{url} in a new window of the browser handled by this controller, if possible, otherwise, open \var{url} in the only - browser window. + browser window. Alias \function{open_new}. +\end{funcdesc} + +\begin{funcdesc}{open_new_tab}{url} + Open \var{url} in a new page ("tab") of the browser handled by this + controller, if possible, otherwise equivalent to \function{open_new}. +\versionadded{2.5} \end{funcdesc} Index: libxmlrpclib.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libxmlrpclib.tex,v retrieving revision 1.9.2.2 retrieving revision 1.9.2.3 diff -u -d -r1.9.2.2 -r1.9.2.3 --- libxmlrpclib.tex 7 Jan 2005 06:57:29 -0000 1.9.2.2 +++ libxmlrpclib.tex 16 Oct 2005 05:23:58 -0000 1.9.2.3 @@ -5,7 +5,7 @@ \moduleauthor{Fredrik Lundh}{fredrik at pythonware.com} \sectionauthor{Eric S. Raymond}{esr at snark.thyrsus.com} -% Not everyting is documented yet. It might be good to describe +% Not everything is documented yet. It might be good to describe % Marshaller, Unmarshaller, getparser, dumps, loads, and Transport. \versionadded{2.2} @@ -19,7 +19,7 @@ \begin{classdesc}{ServerProxy}{uri\optional{, transport\optional{, encoding\optional{, verbose\optional{, - allow_none}}}}} + allow_none\optional{, use_datetime}}}}}} A \class{ServerProxy} instance is an object that manages communication with a remote XML-RPC server. The required first argument is a URI (Uniform Resource Indicator), and will normally be the URL of the @@ -32,7 +32,14 @@ default behaviour is for \code{None} to raise a \exception{TypeError}. This is a commonly-used extension to the XML-RPC specification, but isn't supported by all clients and servers; see -\url{http://ontosys.com/xml-rpc/extensions.html} for a description. +\url{http://ontosys.com/xml-rpc/extensions.php} for a description. +The \var{use_datetime} flag can be used to cause date/time values to be +presented as \class{\refmodule{datetime}.datetime} objects; this is false +by default. \class{\refmodule{datetime}.datetime}, +\class{\refmodule{datetime}.date} and \class{\refmodule{datetime}.time} +objects may be passed to calls. \class{\refmodule{datetime}.date} objects +are converted with a time of ``00:00:00''. +\class{\refmodule{datetime}.time} objects are converted using today's date. Both the HTTP and HTTPS transports support the URL syntax extension for HTTP Basic Authentication: \code{http://user:pass at host:port/path}. The @@ -62,8 +69,11 @@ elements. Arrays are returned as lists} \lineii{structures}{A Python dictionary. Keys must be strings, values may be any conformable type.} - \lineii{dates}{in seconds since the epoch; pass in an instance of the - \class{DateTime} wrapper class} + \lineii{dates}{in seconds since the epoch (pass in an instance of the + \class{DateTime} class) or a + \class{\refmodule{datetime}.datetime}, + \class{\refmodule{datetime}.date} or + \class{\refmodule{datetime}.time} instance} \lineii{binary data}{pass in an instance of the \class{Binary} wrapper class} \end{tableii} @@ -87,6 +97,7 @@ \class{Server} is retained as an alias for \class{ServerProxy} for backwards compatibility. New code should use \class{ServerProxy}. +\versionchanged[The \var{use_datetime} flag was added]{2.5} \end{classdesc} @@ -96,7 +107,7 @@ client software in several languages. Contains pretty much everything an XML-RPC client developer needs to know.} \seetitle[http://xmlrpc-c.sourceforge.net/hacks.php] - {XML-RPC-Hacks page}{Extensions for various open-source + {XML-RPC Hacks page}{Extensions for various open-source libraries to support introspection and multicall.} \end{seealso} @@ -149,7 +160,8 @@ Introspection methods are currently supported by servers written in PHP, C and Microsoft .NET. Partial introspection support is included in recent updates to UserLand Frontier. Introspection support for -Perl, Python and Java is available at the XML-RPC Hacks page. +Perl, Python and Java is available at the \ulink{XML-RPC +Hacks}{http://xmlrpc-c.sourceforge.net/hacks.php} page. \subsection{Boolean Objects \label{boolean-objects}} @@ -170,21 +182,23 @@ \subsection{DateTime Objects \label{datetime-objects}} -This class may be initialized with seconds since the epoch, a -time tuple, or an ISO 8601 time/date string. It has the following -methods, supported mainly for internal use by the -marshalling/unmarshalling code: +This class may be initialized with seconds since the epoch, a time tuple, an +ISO 8601 time/date string, or a {}\class{\refmodule{datetime}.datetime}, +{}\class{\refmodule{datetime}.date} or {}\class{\refmodule{datetime}.time} +instance. It has the following methods, supported mainly for internal use +by the marshalling/unmarshalling code: \begin{methoddesc}{decode}{string} Accept a string as the instance's new time value. \end{methoddesc} \begin{methoddesc}{encode}{out} -Write the XML-RPC encoding of this DateTime item to the out stream object. +Write the XML-RPC encoding of this \class{DateTime} item to the +\var{out} stream object. \end{methoddesc} It also supports certain of Python's built-in operators through -\method{__cmp__} and \method{__repr__} methods. +\method{__cmp__()} and \method{__repr__()} methods. \subsection{Binary Objects \label{binary-objects}} @@ -296,7 +310,6 @@ \begin{funcdesc}{dumps}{params\optional{, methodname\optional{, methodresponse\optional{, encoding\optional{, allow_none}}}}} - Convert \var{params} into an XML-RPC request. or into a response if \var{methodresponse} is true. \var{params} can be either a tuple of arguments or an instance of the @@ -308,12 +321,21 @@ provide a true value for \var{allow_none}. \end{funcdesc} -\begin{funcdesc}{loads}{data} +\begin{funcdesc}{loads}{data\optional{, use_datetime}} Convert an XML-RPC request or response into Python objects, a \code{(\var{params}, \var{methodname})}. \var{params} is a tuple of argument; \var{methodname} is a string, or \code{None} if no method name is present in the packet. If the XML-RPC packet represents a fault condition, this function will raise a \exception{Fault} exception. +The \var{use_datetime} flag can be used to cause date/time values to be +presented as \class{\refmodule{datetime}.datetime} objects; this is false +by default. +Note that even if you call an XML-RPC method with +\class{\refmodule{datetime}.date} or \class{\refmodule{datetime}.time} +objects, they are converted to \class{DateTime} objects internally, so only +{}\class{\refmodule{datetime}.datetime} objects will be returned. + +\versionchanged[The \var{use_datetime} flag was added]{2.5} \end{funcdesc} Index: libzlib.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libzlib.tex,v retrieving revision 1.27.12.1 retrieving revision 1.27.12.2 diff -u -d -r1.27.12.1 -r1.27.12.2 --- libzlib.tex 7 Jan 2005 06:57:29 -0000 1.27.12.1 +++ libzlib.tex 16 Oct 2005 05:23:58 -0000 1.27.12.2 @@ -8,11 +8,16 @@ For applications that require data compression, the functions in this module allow compression and decompression, using the zlib library. -The zlib library has its own home page at \url{http://www.gzip.org/zlib/}. +The zlib library has its own home page at \url{http://www.zlib.net}. There are known incompatibilities between the Python module and versions of the zlib library earlier than 1.1.3; 1.1.3 has a security vulnerability, so we recommend using 1.1.4 or later. +zlib's functions have many options and often need to be used in a +particular order. This documentation doesn't attempt to cover all of +the permutations; consult the zlib manual at +\url{http://www.zlib.net/manual.html} for authoritative information. + The available exception and functions in this module are: \begin{excdesc}{error} @@ -110,8 +115,7 @@ compressed output is returned. \var{mode} can be selected from the constants \constant{Z_SYNC_FLUSH}, \constant{Z_FULL_FLUSH}, or \constant{Z_FINISH}, defaulting to \constant{Z_FINISH}. \constant{Z_SYNC_FLUSH} and -\constant{Z_FULL_FLUSH} allow compressing further strings of data and -are used to allow partial error recovery on decompression, while +\constant{Z_FULL_FLUSH} allow compressing further strings of data, while \constant{Z_FINISH} finishes the compressed stream and prevents compressing any more data. After calling \method{flush()} with \var{mode} set to \constant{Z_FINISH}, the @@ -171,5 +175,7 @@ \begin{seealso} \seemodule{gzip}{Reading and writing \program{gzip}-format files.} - \seeurl{http://www.gzip.org/zlib/}{The zlib library home page.} + \seeurl{http://www.zlib.net}{The zlib library home page.} + \seeurl{http://www.zlib.net/manual.html}{The zlib manual explains + the semantics and usage of the library's many functions.} \end{seealso} Index: tkinter.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/tkinter.tex,v retrieving revision 1.12.2.2 retrieving revision 1.12.2.3 diff -u -d -r1.12.2.2 -r1.12.2.3 --- tkinter.tex 7 Jan 2005 06:57:29 -0000 1.12.2.2 +++ tkinter.tex 16 Oct 2005 05:23:58 -0000 1.12.2.3 @@ -258,8 +258,10 @@ self.pack() self.createWidgets() -app = Application() +root = Tk() +app = Application(master=root) app.mainloop() +root.destroy() \end{verbatim} @@ -621,7 +623,6 @@ self.entrythingy = Entry() self.entrythingy.pack() - self.button.pack() # here is the application variable self.contents = StringVar() # set it to some value @@ -732,7 +733,7 @@ Screen distances can be specified in either pixels or absolute distances. Pixels are given as numbers and absolute distances as strings, with the trailing character denoting units: \code{c} -for centimeters, \code{i} for inches, \code{m} for millimeters, +for centimetres, \code{i} for inches, \code{m} for millimetres, \code{p} for printer's points. For example, 3.5 inches is expressed as \code{"3.5i"}. @@ -1354,7 +1355,7 @@ \item \ulink{Compound} -{http://tix.sourceforge.net/dist/current/man/html/TixCmd/compound.html} +{http://tix.sourceforge.net/dist/current/man/html/TixCmd/compound.htm} image types can be used to create images that consists of multiple horizontal lines; each line is composed of a series of items (texts, bitmaps, images or spaces) arranged from left to right. For example, a Index: xmldom.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/xmldom.tex,v retrieving revision 1.19.8.2 retrieving revision 1.19.8.3 diff -u -d -r1.19.8.2 -r1.19.8.3 --- xmldom.tex 7 Jan 2005 06:57:30 -0000 1.19.8.2 +++ xmldom.tex 16 Oct 2005 05:23:58 -0000 1.19.8.3 @@ -84,8 +84,8 @@ \seetitle[http://pyxml.sourceforge.net]{PyXML}{Users that require a full-featured implementation of DOM should use the PyXML package.} - \seetitle[http://cgi.omg.org/cgi-bin/doc?orbos/99-08-02.pdf]{CORBA - Scripting with Python} + \seetitle[http://www.omg.org/docs/formal/02-11-05.pdf]{Python + Language Mapping Specification} {This specifies the mapping from OMG IDL to Python.} \end{seealso} @@ -213,6 +213,24 @@ \class{DOMImplementation} as well. \begin{methoddesc}[DOMImplementation]{hasFeature}{feature, version} +Return true if the feature identified by the pair of strings +\var{feature} and \var{version} is implemented. +\end{methoddesc} + +\begin{methoddesc}[DOMImplementation]{createDocument}{namespaceUri, qualifiedName, doctype} +Return a new \class{Document} object (the root of the DOM), with a +child \class{Element} object having the given \var{namespaceUri} and +\var{qualifiedName}. The \var{doctype} must be a \class{DocumentType} +object created by \method{createDocumentType()}, or \code{None}. +In the Python DOM API, the first two arguments can also be \code{None} +in order to indicate that no \class{Element} child is to be created. +\end{methoddesc} + +\begin{methoddesc}[DOMImplementation]{createDocumentType}{qualifiedName, publicId, systemId} +Return a new \class{DocumentType} object that encapsulates the given +\var{qualifiedName}, \var{publicId}, and \var{systemId} strings, +representing the information contained in an XML document type +declaration. \end{methoddesc} @@ -545,8 +563,19 @@ Same as equivalent method in the \class{Document} class. \end{methoddesc} -\begin{methoddesc}[Element]{getAttribute}{attname} -Return an attribute value as a string. +\begin{methoddesc}[Element]{hasAttribute}{name} +Returns true if the element has an attribute named by \var{name}. +\end{methoddesc} + +\begin{methoddesc}[Element]{hasAttributeNS}{namespaceURI, localName} +Returns true if the element has an attribute named by +\var{namespaceURI} and \var{localName}. +\end{methoddesc} + +\begin{methoddesc}[Element]{getAttribute}{name} +Return the value of the attribute named by \var{name} as a +string. If no such attribute exists, an empty string is returned, +as if the attribute had no value. \end{methoddesc} \begin{methoddesc}[Element]{getAttributeNode}{attrname} @@ -555,8 +584,9 @@ \end{methoddesc} \begin{methoddesc}[Element]{getAttributeNS}{namespaceURI, localName} -Return an attribute value as a string, given a \var{namespaceURI} and -\var{localName}. +Return the value of the attribute named by \var{namespaceURI} and +\var{localName} as a string. If no such attribute exists, an empty +string is returned, as if the attribute had no value. \end{methoddesc} \begin{methoddesc}[Element]{getAttributeNodeNS}{namespaceURI, localName} @@ -564,7 +594,7 @@ \var{localName}. \end{methoddesc} -\begin{methoddesc}[Element]{removeAttribute}{attname} +\begin{methoddesc}[Element]{removeAttribute}{name} Remove an attribute by name. No exception is raised if there is no matching attribute. \end{methoddesc} @@ -579,7 +609,7 @@ qname. No exception is raised if there is no matching attribute. \end{methoddesc} -\begin{methoddesc}[Element]{setAttribute}{attname, value} +\begin{methoddesc}[Element]{setAttribute}{name, value} Set an attribute value from a string. \end{methoddesc} @@ -886,8 +916,13 @@ \keyword{readonly} may not restrict write access in all DOM implementations. -Additionally, the accessor functions are not required. If provided, +In the Python DOM API, accessor functions are not required. If provided, they should take the form defined by the Python IDL mapping, but these methods are considered unnecessary since the attributes are accessible directly from Python. ``Set'' accessors should never be provided for \keyword{readonly} attributes. + +The IDL definitions do not fully embody the requirements of the W3C DOM +API, such as the notion of certain objects, such as the return value of +\method{getElementsByTagName()}, being ``live''. The Python DOM API +does not require implementations to enforce such requirements. From jhylton at users.sourceforge.net Sun Oct 16 07:24:38 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:38 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc developers.txt, NONE, 1.17.2.2 ACKS, 1.185.2.3, 1.185.2.4 NEWS, 1.436.2.2, 1.436.2.3 SpecialBuilds.txt, 1.15.4.2, 1.15.4.3 cheatsheet, 1.2.18.2, 1.2.18.3 gdbinit, 1.2.24.1, 1.2.24.2 python.man, 1.24.2.2, 1.24.2.3 valgrind-python.supp, 1.1.6.1, 1.1.6.2 vimrc, 1.4.4.1, 1.4.4.2 Message-ID: <20051016052438.4E1E61E400D@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Misc Modified Files: Tag: ast-branch ACKS NEWS SpecialBuilds.txt cheatsheet gdbinit python.man valgrind-python.supp vimrc Added Files: Tag: ast-branch developers.txt Log Message: Merge head to branch (for the last time) --- NEW FILE: developers.txt --- Developer Log ============= This file is a running log of developers given permissions on SourceForge. The purpose is to provide some institutional memory of who was given access and why. The first entry starts in April 2005. In keeping with the style of Misc/NEWS, newer entries should be added to the top. Any markup should be in the form of ReST. Entries should include the initials of the project admin who made the change or granted access. Feel free to revise the format to accommodate documentation needs as they arise. Permissions History ------------------- - Added two new developers for the Summer of Code project. 8 July 2005 by RDH. Andrew Kuchling will be mentoring Gregory K Johnson for a project to enchance mailbox. Brett Cannon requested access for Flovis Bruynooghe (sirolf) to work on pstats, profile, and hotshot. Both users are expected to work primarily in nondist/sandbox and have their work reviewed before making updates to active code. - Reinhold Birkenfeld was given SF tracker permissions on 28 May 2005 by RDH. Since the beginning of 2005, he has been active in discussions on python-dev and has submitted a dozen patch reviews. The permissions add the ability to change tracker status and to attach patches. On 3 June 2005, this was expanded by RDH to include checkin permissions. - Terry Reedy was given SF tracker permissions on 7 Apr 2005 by RDH. - Nick Coghlan was given SF tracker permissions on 5 Apr 2005 by RDH. For several months, he has been active in reviewing and contributing patches. The added permissions give him greater flexibility in working with the tracker. - Eric Price was made a developer on 2 May 2003 by TGP. This was specifically to work on the new ``decimal`` package, which lived in ``nondist/sandbox/decimal/`` at the time. - Eric S. Raymond was made a developer on 2 Jul 2000 by TGP, for general library work. His request is archived here: http://mail.python.org/pipermail/python-dev/2000-July/005314.html Permissions Dropped on Request ------------------------------ - Johannes Gijsbers sent a drop request. 27 July 2005 RDH - Flovis Bruynooghe sent a drop request. 14 July 2005 RDH - Paul Prescod sent a drop request. 30 Apr 2005 RDH - Finn Bock sent a drop request. 13 Apr 2005 RDH - Eric Price sent a drop request. 10 Apr 2005 RDH - Irmen de Jong requested dropping CVS access while keeping tracker access. 10 Apr 2005 RDH - Moshe Zadka and Ken Manheimer sent drop requests. 8 Apr 2005 by RDH - Steve Holden, Gerhard Haring, and David Cole sent email stating that they no longer use their access. 7 Apr 2005 RDH Permissions Dropped after Loss of Contact ----------------------------------------- - Several unsuccessful efforts were made to contact Charles G Waldman. Removed on 8 Apr 2005 by RDH. Initials of Project Admins -------------------------- RDH: Raymond Hettinger TGP: Tim Peters Index: ACKS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/ACKS,v retrieving revision 1.185.2.3 retrieving revision 1.185.2.4 diff -u -d -r1.185.2.3 -r1.185.2.4 --- ACKS 2 Apr 2005 19:00:34 -0000 1.185.2.3 +++ ACKS 16 Oct 2005 05:24:03 -0000 1.185.2.4 @@ -50,6 +50,7 @@ Thomas Bellman Juan M. Bello Rivas Alexander Belopolsky +Andrew Bennetts Andy Bensky Michel Van den Bergh Eric Beser @@ -66,6 +67,7 @@ Paul Boddie Matthew Boedicker David Bolen +Gregory Bond Jurjen Bos Peter Bosch Eric Bouck @@ -304,6 +306,7 @@ Jiba Orjan Johansen Simon Johnston +Evan Jones Richard Jones Irmen de Jong Lucas de Jonge @@ -314,6 +317,7 @@ Bob Kahn Kurt B. Kaiser Tamito Kajiyama +Peter van Kampen Jacob Kaplan-Moss Lou Kates Sebastien Keim @@ -400,6 +404,7 @@ Mike Meyer Steven Miale Trent Mick +Chad Miller Roman Milner Dom Mitchell Doug Moen @@ -505,6 +510,7 @@ Saskia van Rossum Donald Wallace Rouse II Liam Routt +Paul Rubin Audun S. Runde Jeff Rush Sam Rushing @@ -549,6 +555,7 @@ Paul Sokolovsky Clay Spence Per Spilling +Joshua Spoerri Noah Spurrier Oliver Steele Greg Stein Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.436.2.2 retrieving revision 1.436.2.3 diff -u -d -r1.436.2.2 -r1.436.2.3 --- NEWS 7 Jan 2005 07:02:45 -0000 1.436.2.2 +++ NEWS 16 Oct 2005 05:24:03 -0000 1.436.2.3 @@ -7,19 +7,238 @@ What's New in Python 2.5 alpha 1? ================================= +*Release date: XX-XXX-2006* + Core and builtins ----------------- +- SF Bug #976608: fix SystemError when mtime of an imported file is -1. + +- SF Bug #887946: fix segfault when redirecting stdin from a directory. + Provide a warning when a directory is passed on the command line. + +- Fix segfault with invalid coding. + +- SF bug #772896: unknown encoding results in MemoryError. + +- All iterators now have a Boolean value of true. Formerly, some iterators + supported a __len__() method which evaluated to False when the iterator + was empty. + +- On 64-bit platforms, when __len__() returns a value that cannot be + represented as a C int, raise OverflowError. + +- test__locale is skipped on OS X < 10.4 (only partial locale support is + present). + +- SF bug #893549: parsing keyword arguments was broken with a few format + codes. + +- Changes donated by Elemental Security to make it work on AIX 5.3 + with IBM's 64-bit compiler (SF patch #1284289). This also closes SF + bug #105470: test_pwd fails on 64bit system (Opteron). + +- Changes donated by Elemental Security to make it work on HP-UX 11 on + Itanium2 with HP's 64-bit compiler (SF patch #1225212). + +- Disallow keyword arguments for type constructors that don't use them + (fixes bug #1119418). + +- Forward UnicodeDecodeError into SyntaxError for source encoding errors. + +- SF bug #900092: When tracing (e.g. for hotshot), restore 'return' events for + exceptions that cause a function to exit. + +- The implementation of set() and frozenset() was revised to use its + own internal data structure. Memory consumption is reduced by 1/3 + and there are modest speed-ups as well. The API is unchanged. + +- SF bug #1238681: freed pointer is used in longobject.c:long_pow(). + +- SF bug #1229429: PyObject_CallMethod failed to decrement some + reference counts in some error exit cases. + +- SF bug #1185883: Python's small-object memory allocator took over + a block managed by the platform C library whenever a realloc specified + a small new size. However, there's no portable way to know then how + much of the address space following the pointer is valid, so no + portable way to copy data from the C-managed block into Python's + small-object space without risking a memory fault. Python's small-object + realloc now leaves such blocks under the control of the platform C + realloc. + +- SF bug #1232517: An overflow error was not detected properly when + attempting to convert a large float to an int in os.utime(). + +- SF bug #1224347: hex longs now print with lowercase letters just + like their int counterparts. + +- SF bug #1163563: the original fix for bug #1010677 ("thread Module + Breaks PyGILState_Ensure()") broke badly in the case of multiple + interpreter states; back out that fix and do a better job (see + http://mail.python.org/pipermail/python-dev/2005-June/054258.html + for a longer write-up of the problem). + +- SF patch #1180995: marshal now uses a binary format by default when + serializing floats. + +- SF patch #1181301: on platforms that appear to use IEEE 754 floats, + the routines that promise to produce IEEE 754 binary representations + of floats now simply copy bytes around. + +- bug #967182: disallow opening files with 'wU' or 'aU' as specified by PEP + 278. + +- patch #1109424: int, long, float, complex, and unicode now check for the + proper magic slot for type conversions when subclassed. Previously the + magic slot was ignored during conversion. Semantics now match the way + subclasses of str always behaved. int/long/float, conversion of an instance + to the base class has been moved to the proper nb_* magic slot and out of + PyNumber_*(). + Thanks Walter Drwald. + +- Descriptors defined in C with a PyGetSetDef structure, where the setter is + NULL, now raise an AttributeError when attempting to set or delete the + attribute. Previously a TypeError was raised, but this was inconsistent + with the equivalent pure-Python implementation. + +- It is now safe to call PyGILState_Release() before + PyEval_InitThreads() (note that if there is reason to believe there + are multiple threads around you still must call PyEval_InitThreads() + before using the Python API; this fix is for extension modules that + have no way of knowing if Python is multi-threaded yet). + +- Typing Ctrl-C whilst raw_input() was waiting in a build with threads + disabled caused a crash. + +- Bug #1165306: instancemethod_new allowed the creation of a method + with im_class == im_self == NULL, which caused a crash when called. + +- Move exception finalisation later in the shutdown process - this + fixes the crash seen in bug #1165761 + +- Added two new builtins, any() and all(). + +- Defining a class with empty parentheses is now allowed + (e.g., ``class C(): pass`` is no longer a syntax error). + Patch #1176012 added support to the 'parser' module and 'compiler' package + (thanks to logistix for that added support). + +- Patch #1115086: Support PY_LONGLONG in structmember. + +- Bug #1155938: new style classes did not check that __init__() was + returning None. + +- Patch #802188: Report characters after line continuation character + ('\') with a specific error message. + +- Bug #723201: Raise a TypeError for passing bad objects to 'L' format. + +- Bug #1124295: the __name__ attribute of file objects was + inadvertently made inaccessible in restricted mode. + +- Bug #1074011: closing sys.std{out,err} now causes a flush() and + an ferror() call. + - min() and max() now support key= arguments with the same meaning as in list.sort(). - The peephole optimizer now performs simple constant folding in expressions: (2+3) --> (5). +- set and frozenset objects can now be marshalled. SF #1098985. + +- Bug #1077106: Poor argument checking could cause memory corruption + in calls to os.read(). + +- The parser did not complain about future statements in illegal + positions. It once again reports a syntax error if a future + statement occurs after anything other than a doc string. + +- Change the %s format specifier for str objects so that it returns a + unicode instance if the argument is not an instance of basestring and + calling __str__ on the argument returns a unicode instance. Extension Modules ----------------- +- Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1, + but Python incorrectly assumes it is in UTF-8 format + +- Fix parse errors in the readline module when compiling without threads. + +- Patch #1288833: Removed thread lock from socket.getaddrinfo on + FreeBSD 5.3 and later versions which got thread-safe getaddrinfo(3). + +- Patches #1298449 and #1298499: Add some missing checks for error + returns in cStringIO.c. + +- Patch #1297028: fix segfault if call type on MultibyteCodec, + MultibyteStreamReader, or MultibyteStreamWriter + +- Fix memory leak in posix.access(). + +- Patch #1213831: Fix typo in unicodedata._getcode. + +- Bug #1007046: os.startfile() did not accept unicode strings encoded in + the file system encoding. + +- Patch #756021: Special-case socket.inet_aton('255.255.255.255') for + platforms that don't have inet_aton(). + +- Bug #1215928: Fix bz2.BZ2File.seek() for 64-bit file offsets. + +- Bug #1191043: Fix bz2.BZ2File.(x)readlines for files containing one + line without newlines. + +- Bug #728515: mmap.resize() now resizes the file on Unix as it did + on Windows. + +- Patch #1180695: Add nanosecond stat resolution, and st_gen, + st_birthtime for FreeBSD. + +- Patch #1231069: The fcntl.ioctl function now uses the 'I' code for + the request code argument, which results in more C-like behaviour + for large or negative values. + +- Bug #1234979: For the argument of thread.Lock.acquire, the Windows + implementation treated all integer values except 1 as false. + +- Bug #1194181: bz2.BZ2File didn't handle mode 'U' correctly. + +- Patch #1212117: os.stat().st_flags is now accessible as a attribute + if available on the platform. + +- Patch #1103951: Expose O_SHLOCK and O_EXLOCK in the posix module if + available on the platform. + +- Bug #1166660: The readline module could segfault if hook functions + were set in a different thread than that which called readline. + +- collections.deque objects now support a remove() method. + +- operator.itemgetter() and operator.attrgetter() now support retrieving + multiple fields. This provides direct support for sorting on multiple + keys (primary, secondary, etc). + +- os.access now supports Unicode path names on non-Win32 systems. + +- Patches #925152, #1118602: Avoid reading after the end of the buffer + in pyexpat.GetInputContext. + +- Patches #749830, #1144555: allow UNIX mmap size to default to current + file size. + +- Added functional.partial(). See PEP309. + +- Patch #1093585: raise a ValueError for negative history items in readline. + {remove_history,replace_history} + +- The spwd module has been added, allowing access to the shadow password + database. + +- stat_float_times is now True. + - array.array objects are now picklable. - the cPickle module no longer accepts the deprecated None option in the @@ -29,17 +248,219 @@ This allows islice() to work more readily with slices: islice(s.start, s.stop, s.step) +- datetime.datetime() now has a strptime class method which can be used to + create datetime object using a string and format. Library ------- +- Patch #754022: Greatly enhanced webbrowser.py (by Oleg Broytmann). + +- Bug #729103: pydoc.py: Fix docother() method to accept additional + "parent" argument. + +- Patch #1300515: xdrlib.py: Fix pack_fstring() to really use null bytes + for padding. + +- Bug #1296004: httplib.py: Limit maximal amount of data read from the + socket to avoid a MemoryError on Windows. + +- Patch #1166948: locale.py: Prefer LC_ALL, LC_CTYPE and LANG over LANGUAGE + to get the correct encoding. + +- Patch #1166938: locale.py: Parse LANGUAGE as a colon separated list of + languages. + +- Patch #1268314: Cache lines in StreamReader.readlines for performance. + +- Bug #1290505: Fix clearing the regex cache for time.strptime(). + +- Bug #1167128: Fix size of a symlink in a tarfile to be 0. + +- Patch #810023: Fix off-by-one bug in urllib.urlretrieve reporthook + functionality. + +- Bug #1163178: Make IDNA return an empty string when the input is empty. + +- Patch #848017: Make Cookie more RFC-compliant. Use CRLF as default output + separator and do not output trailing semicola. + +- Patch #1062060: urllib.urlretrieve() now raises a new exception, named + ContentTooShortException, when the actually downloaded size does not + match the Content-Length header. + +- Bug #1121494: distutils.dir_utils.mkpath now accepts Unicode strings. + +- Bug #1178484: Return complete lines from codec stream readers + even if there is an exception in later lines, resulting in + correct line numbers for decoding errors in source code. + +- Bug #1192315: Disallow negative arguments to clear() in pdb. + +- Patch #827386: Support absolute source paths in msvccompiler.py. + +- Patch #1105730: Apply the new implementation of commonprefix in posixpath + to ntpath, macpath, os2emxpath and riscospath. + +- Fix a problem in Tkinter introduced by SF patch #869468: delete bogus + __hasattr__ and __delattr__ methods on class Tk that were breaking + Tkdnd. + +- Bug #1015140: disambiguated the term "article id" in nntplib docs and + docstrings to either "article number" or "message id". + +- Bug #1238170: threading.Thread.__init__ no longer has "kwargs={}" as a + parameter, but uses the usual "kwargs=None". + +- textwrap now processes text chunks at O(n) speed instead of O(n**2). + Patch #1209527 (Contributed by Connelly). + +- urllib2 has now an attribute 'httpresponses' mapping from HTTP status code + to W3C name (404 -> 'Not Found'). RFE #1216944. + +- Bug #1177468: Don't cache the /dev/urandom file descriptor for os.urandom, + as this can cause problems with apps closing all file descriptors. + +- Bug #839151: Fix an attempt to access sys.argv in the warnings module + it can be missing in embedded interpreters + +- Bug #1155638: Fix a bug which affected HTTP 0.9 responses in httplib. + +- Bug #1100201: Cross-site scripting was possible on BaseHTTPServer via + error messages. + +- Bug #1108948: Cookie.py produced invalid JavaScript code. + +- The tokenize module now detects and reports indentation errors. + Bug #1224621. + +- The tokenize module has a new untokenize() function to support a full + roundtrip from lexed tokens back to Python sourcecode. In addition, + the generate_tokens() function now accepts a callable argument that + terminates by raising StopIteration. + +- Bug #1196315: fix weakref.WeakValueDictionary constructor. + +- Bug #1213894: os.path.realpath didn't resolve symlinks that were the first + component of the path. + +- Patch #1120353: The xmlrpclib module provides better, more transparent, + support for datetime.{datetime,date,time} objects. With use_datetime set + to True, applications shouldn't have to fiddle with the DateTime wrapper + class at all. + +- distutils.commands.upload was added to support uploading distribution + files to PyPI. + +- distutils.commands.register now encodes the data as UTF-8 before posting + them to PyPI. + +- decimal operator and comparison methods now return NotImplemented + instead of raising a TypeError when interacting with other types. This + allows other classes to implement __radd__ style methods and have them + work as expected. + +- Bug #1163325: Decimal infinities failed to hash. Attempting to + hash a NaN raised an InvalidOperation instead of a TypeError. + +- Patch #918101: Add tarfile open mode r|* for auto-detection of the + stream compression; add, for symmetry reasons, r:* as a synonym of r. + +- Patch #1043890: Add extractall method to tarfile. + +- Patch #1075887: Don't require MSVC in distutils if there is nothing + to build. + +- Patch #1103407: Properly deal with tarfile iterators when untarring + symbolic links on Windows. + +- Patch #645894: Use getrusage for computing the time consumption in + profile.py if available. + +- Patch #1046831: Use get_python_version where appropriate in sysconfig.py. + +- Patch #1117454: Remove code to special-case cookies without values + in LWPCookieJar. + +- Patch #1117339: Add cookielib special name tests. + +- Patch #1112812: Make bsddb/__init__.py more friendly for modulefinder. + +- Patch #1110248: SYNC_FLUSH the zlib buffer for GZipFile.flush. + +- Patch #1107973: Allow to iterate over the lines of a tarfile.ExFileObject. + +- Patch #1104111: Alter setup.py --help and --help-commands. + +- Patch #1121234: Properly cleanup _exit and tkerror commands. + +- Patch #1049151: xdrlib now unpacks booleans as True or False. + +- Fixed bug in a NameError bug in cookielib. Patch #1116583. + +- Applied a security fix to SimpleXMLRPCserver (PSF-2005-001). This + disables recursive traversal through instance attributes, which can + be exploited in various ways. + +- Bug #1110478: Revert os.environ.update to do putenv again. + +- Bug #1103844: fix distutils.install.dump_dirs() with negated options. + +- os.{SEEK_SET, SEEK_CUR, SEEK_END} have been added for convenience. + +- Enhancements to the csv module: + + + Dialects are now validated by the underlying C code, better + reflecting its capabilities, and improving its compliance with + PEP 305. + + Dialect parameter parsing has been re-implemented to improve error + reporting. + + quotechar=None and quoting=QUOTE_NONE now work the way PEP 305 + dictates. + + the parser now removes the escapechar prefix from escaped characters. + + when quoting=QUOTE_NONNUMERIC, the writer now tests for numeric + types, rather than any object than can be represented as a numeric. + + when quoting=QUOTE_NONNUMERIC, the reader now casts unquoted fields + to floats. + + reader now allows \r characters to be quoted (previously it only allowed + \n to be quoted). + + writer doublequote handling improved. + + Dialect classes passed to the module are no longer instantiated by + the module before being parsed (the former validation scheme required + this, but the mechanism was unreliable). + + The dialect registry now contains instances of the internal + C-coded dialect type, rather than references to python objects. + + the internal c-coded dialect type is now immutable. + + register_dialect now accepts the same keyword dialect specifications + as the reader and writer, allowing the user to register dialects + without first creating a dialect class. + + a configurable limit to the size of parsed fields has been added - + previously, an unmatched quote character could result in the entire + file being read into the field buffer before an error was reported. + + A new module method csv.field_size_limit() has been added that sets + the parser field size limit (returning the former limit). The initial + limit is 128kB. + + A line_num attribute has been added to the reader object, which tracks + the number of lines read from the source iterator. This is not + the same as the number of records returned, as records can span + multiple lines. + + reader and writer objects were not being registered with the cyclic-GC. + This has been fixed. + +- _DummyThread objects in the threading module now delete self.__block that is + inherited from _Thread since it uses up a lock allocated by 'thread'. The + lock primitives tend to be limited in number and thus should not be wasted on + a _DummyThread object. Fixes bug #1089632. + +- The imghdr module now detects Exif files. + - StringIO.truncate() now correctly adjusts the size attribute. (Bug #951915). - locale.py now uses an updated locale alias table (built using Tools/i18n/makelocalealias.py, a tool to parse the X11 locale alias file); the encoding lookup was enhanced to use Python's - encoding alias table + encoding alias table. - moved deprecated modules to Lib/lib-old: whrandom, tzparse, statcache. @@ -70,10 +491,58 @@ - Bug #1083110: ``zlib.decompress.flush()`` would segfault if called immediately after creating the object, without any intervening ``.decompress()`` calls. +- The reconvert.quote function can now emit triple-quoted strings. The + reconvert module now has some simple documentation. + +- ``UserString.MutableString`` now supports negative indices in + ``__setitem__`` and ``__delitem__`` + +- Bug #1149508: ``textwrap`` now handles hyphenated numbers (eg. "2004-03-05") + correctly. + +- Partial fixes for SF bugs #1163244 and #1175396: If a chunk read by + ``codecs.StreamReader.readline()`` has a trailing "\r", read one more + character even if the user has passed a size parameter to get a proper + line ending. Remove the special handling of a "\r\n" that has been split + between two lines. + +- Bug #1251300: On UCS-4 builds the "unicode-internal" codec will now complain + about illegal code points. The codec now supports PEP 293 style error + handlers. + +- Bug #1235646: ``codecs.StreamRecoder.next()`` now reencodes the data it reads + from the input stream, so that the output is a byte string in the correct + encoding instead of a unicode string. + +- Bug #1202493: Fixing SRE parser to handle '{}' as perl does, rather than + considering it exactly like a '*'. + +- Bug #1245379: Add "unicode-1-1-utf-7" as an alias for "utf-7" to + ``encodings.aliases``. Build ----- +- Bug #1189330: configure did not correctly determine the necessary + value of LINKCC if python was built with GCC 4.0. + +- Upgrade Windows build to zlib 1.2.3 which eliminates a potential security + vulnerability in zlib 1.2.1 and 1.2.2. + +- EXTRA_CFLAGS has been introduced as an environment variable to hold compiler + flags that change binary compatibility. Changes were also made to + distutils.sysconfig to also use the environment variable when used during + compilation of the interpreter and of C extensions through distutils. + +- SF patch 1171735: Darwin 8's headers are anal about POSIX compliance, + and linking has changed (prebinding is now deprecated, and libcc_dynamic + no longer exists). This configure patch makes things right. + +- Bug #1158607: Build with --disable-unicode again. + +- spwdmodule.c is built only if either HAVE_GETSPNAM or HAVE_HAVE_GETSPENT is + defined. Discovered as a result of not being able to build on OS X. + - setup.py now uses the directories specified in LDFLAGS using the -L option and in CPPFLAGS using the -I option for adding library and include directories, respectively, for compiling extension modules against. This has @@ -83,25 +552,80 @@ ``-L/opt/local/lib`` for DarwinPorts) and CPPFLAGS (``-I/sw/include`` for Fink, ``-I/opt/local/include`` for DarwinPorts). +- Test in configure.in that checks for tzset no longer dependent on tm->tm_zone + to exist in the struct (not required by either ISO C nor the UNIX 2 spec). + Tests for sanity in tzname when HAVE_TZNAME defined were also defined. + Closes bug #1096244. Thanks Gregory Bond. + C API ----- +- Added a C API for set and frozenset objects. + - Removed PyRange_New(). +- Patch #1313939: PyUnicode_DecodeCharmap() accepts a unicode string as the + mapping argument now. This string is used as a mapping table. Byte values + greater than the length of the string and 0xFFFE are treated as undefined + mappings. + Tests ----- +- In test_os, st_?time is now truncated before comparing it with ST_?TIME. + + +Documentation +------------- + +- Bug #1274828: Document os.path.splitunc(). + +- Bug #1190204: Clarify which directories are searched by site.py. + +- Bug #1193849: Clarify os.path.expanduser() documentation. + +- Bug #1243192: re.UNICODE and re.LOCALE affect \d, \D, \s and \S. + +- Bug #755617: Document the effects of os.chown() on Windows. + +- Patch #1180012: The documentation for modulefinder is now in the library reference. + +- Patch #1213031: Document that os.chown() accepts argument values of -1. + +- Bug #1190563: Document os.waitpid() return value with WNOHANG flag. + +- Bug #1175022: Correct the example code for property(). + +- Document the IterableUserDict class in the UserDict module. + Closes bug #1166582. + +- Remove all latent references for "Macintosh" that referred to semantics for + Mac OS 9 and change to reflect the state for OS X. + Closes patch #1095802. Thanks Jack Jansen. Mac --- +New platforms +------------- + +- FreeBSD 7 support is added. + Tools/Demos ----------- +- Added two new files to Tools/scripts: pysource.py, which recursively + finds Python source files, and findnocoding.py, which finds Python + source files that need an encoding declaration. + Patch #784089, credits to Oleg Broytmann. + +- Bug #1072853: pindent.py used an uninitialized variable. + +- Patch #1177597: Correct Complex.__init__. What's New in Python 2.4 final? @@ -557,7 +1081,7 @@ - The decimal package's Context.copy() method now returns deep copies. - Deprecated sys.exitfunc in favor of the atexit module. The sys.exitfunc - attribute will be kept around for backwards compatability and atexit + attribute will be kept around for backwards compatibility and atexit will just become the one preferred way to do it. - patch #675551: Add get_history_item and replace_history_item functions @@ -1024,7 +1548,7 @@ - Optimized list resize operations to make fewer calls to the system realloc(). Significantly speeds up list appends, list pops, - list comprehensions, and the list contructor (when the input iterable + list comprehensions, and the list constructor (when the input iterable length is not known). - Changed the internal list over-allocation scheme. For larger lists, @@ -1303,6 +1827,8 @@ Library ------- +- Bug #1266283: The new function "lexists" is now in os.path.__all__. + - Bug #981530: Fix UnboundLocalError in shutil.rmtree(). This affects the documented behavior: the function passed to the onerror() handler can now also be os.listdir. Index: SpecialBuilds.txt =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/SpecialBuilds.txt,v retrieving revision 1.15.4.2 retrieving revision 1.15.4.3 diff -u -d -r1.15.4.2 -r1.15.4.3 --- SpecialBuilds.txt 7 Jan 2005 07:02:48 -0000 1.15.4.2 +++ SpecialBuilds.txt 16 Oct 2005 05:24:03 -0000 1.15.4.3 @@ -1,6 +1,9 @@ This file describes some special Python build types enabled via compile-time preprocessor defines. +It is best to define these options in the EXTRA_FLAGS environment variable; +``EXTRA_CFLAGS="-DPy_REF_DEBUG" ./configure``. + --------------------------------------------------------------------------- Py_REF_DEBUG introduced in 1.4 named REF_DEBUG before 1.4 @@ -207,14 +210,14 @@ --------------------------------------------------------------------------- LLTRACE introduced well before 1.0 -Compile in support of Low Level TRACE-ing of the main interpreter loop. +Compile in support for Low Level TRACE-ing of the main interpreter loop. -When this preprocessor symbol is defined, before eval_frame -(eval_code2 before 2.2) executes a frame's code it checks the frame's -global namespace for a variable "__lltrace__". If such a variable is -found, mounds of information about what the interpreter is doing are -sprayed to stdout, such as every opcode and opcode argument and values -pushed onto and popped off the value stack. +When this preprocessor symbol is defined, before PyEval_EvalFrame +(eval_frame in 2.3 and 2.2, eval_code2 before that) executes a frame's code +it checks the frame's global namespace for a variable "__lltrace__". If +such a variable is found, mounds of information about what the interpreter +is doing are sprayed to stdout, such as every opcode and opcode argument +and values pushed onto and popped off the value stack. Not useful very often, but very useful when needed. @@ -245,8 +248,8 @@ On the PowerPC the rate at which the time base register is incremented is not defined by the architecture specification, so you'll need to -find the manual for your specific processor. For the 750CX, 750CXe, -750FX (all sold as the G3) we find: +find the manual for your specific processor. For the 750CX, 750CXe +and 750FX (all sold as the G3) we find: The time base counter is clocked at a frequency that is one-fourth that of the bus clock. Index: cheatsheet =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/cheatsheet,v retrieving revision 1.2.18.2 retrieving revision 1.2.18.3 diff -u -d -r1.2.18.2 -r1.2.18.3 --- cheatsheet 7 Jan 2005 07:02:48 -0000 1.2.18.2 +++ cheatsheet 16 Oct 2005 05:24:03 -0000 1.2.18.3 @@ -209,7 +209,7 @@ +x, -x, ~x Unary operators x**y Power x*y x/y x%y x//y mult, division, modulo, floor division - x+y x-y addition, substraction + x+y x-y addition, subtraction x<>y Bit shifting x&y Bitwise and x^y Bitwise exclusive or Index: gdbinit =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/gdbinit,v retrieving revision 1.2.24.1 retrieving revision 1.2.24.2 diff -u -d -r1.2.24.1 -r1.2.24.2 --- gdbinit 7 Jan 2005 07:02:48 -0000 1.2.24.1 +++ gdbinit 16 Oct 2005 05:24:03 -0000 1.2.24.2 @@ -43,6 +43,29 @@ end end +# A rewrite of the Python interpreter's line number calculator in GDB's +# command language +define lineno + set $__continue = 1 + set $__co = f->f_code + set $__lasti = f->f_lasti + set $__sz = ((PyStringObject *)$__co->co_lnotab)->ob_size/2 + set $__p = (unsigned char *)((PyStringObject *)$__co->co_lnotab)->ob_sval + set $__li = $__co->co_firstlineno + set $__ad = 0 + while ($__sz-1 >= 0 && $__continue) + set $__sz = $__sz - 1 + set $__ad = $__ad + *$__p + set $__p = $__p + 1 + if ($__ad > $__lasti) + set $__continue = 0 + end + set $__li = $__li + *$__p + set $__p = $__p + 1 + end + printf "%d", $__li +end + # print the current frame - verbose define pyframev pyframe @@ -52,7 +75,35 @@ define pyframe set $__fn = (char *)((PyStringObject *)co->co_filename)->ob_sval set $__n = (char *)((PyStringObject *)co->co_name)->ob_sval - printf "%s (%d): %s\n", $__fn, f->f_lineno, $__n + printf "%s (", $__fn + lineno + printf "): %s\n", $__n +### Uncomment these lines when using from within Emacs/XEmacs so it will +### automatically track/display the current Python source line +# printf "%c%c%s:", 032, 032, $__fn +# lineno +# printf ":1\n" +end + +### Use these at your own risk. It appears that a bug in gdb causes it +### to crash in certain circumstances. + +#define up +# up-silently 1 +# printframe +#end + +#define down +# down-silently 1 +# printframe +#end + +define printframe + if $pc > PyEval_EvalFrameEx && $pc < PyEval_EvalCodeEx + pyframe + else + frame + end end # Here's a somewhat fragile way to print the entire Python stack from gdb. @@ -64,7 +115,7 @@ # interpreter, but the test can be extended by an interested party). If # Py_Main <= $pc <= Py_GetArgcArv is true, $pc is in Py_Main(), so the while # tests succeeds as long as it's not true. In a similar fashion the if -# statement tests to see if we are in eval_frame(). +# statement tests to see if we are in PyEval_EvalFrame(). # print the entire Python call stack define pystack Index: python.man =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/python.man,v retrieving revision 1.24.2.2 retrieving revision 1.24.2.3 diff -u -d -r1.24.2.2 -r1.24.2.3 --- python.man 7 Jan 2005 07:02:50 -0000 1.24.2.2 +++ python.man 16 Oct 2005 05:24:03 -0000 1.24.2.3 @@ -253,7 +253,7 @@ .I command may contain multiple statements separated by newlines. Leading whitespace is significant in Python statements! -In non-interactive mode, the entire input is parsed befored it is +In non-interactive mode, the entire input is parsed before it is executed. .PP If available, the script name and additional arguments thereafter are @@ -330,7 +330,7 @@ Augments the default search path for module files. The format is the same as the shell's $PATH: one or more directory pathnames separated by colons. -Non-existant directories are silently ignored. +Non-existent directories are silently ignored. The default search path is installation dependent, but generally begins with ${prefix}/lib/python (see PYTHONHOME above). The default search path is always appended to $PYTHONPATH. Index: valgrind-python.supp =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/valgrind-python.supp,v retrieving revision 1.1.6.1 retrieving revision 1.1.6.2 diff -u -d -r1.1.6.1 -r1.1.6.2 --- valgrind-python.supp 7 Jan 2005 07:02:50 -0000 1.1.6.1 +++ valgrind-python.supp 16 Oct 2005 05:24:03 -0000 1.1.6.2 @@ -29,6 +29,12 @@ } { + ADDRESS_IN_RANGE/Invalid read of size 8 (x86_64) + Memcheck:Value8 + fun:Py_ADDRESS_IN_RANGE +} + +{ ADDRESS_IN_RANGE/Conditional jump or move depends on uninitialised value Memcheck:Cond fun:Py_ADDRESS_IN_RANGE @@ -84,6 +90,118 @@ } +{ + Avoid problem in libc on gentoo + Memcheck:Cond + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so +} + +{ + Avoid problem in glibc on gentoo + Memcheck:Addr8 + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/libc-2.3.4.so + obj:/lib/ld-2.3.4.so + fun:_dl_open + obj:/lib/libdl-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/libdl-2.3.4.so + fun:dlopen +} + +{ + Avoid problem in glibc on gentoo + Memcheck:Addr8 + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/libc-2.3.4.so + obj:/lib/ld-2.3.4.so + fun:_dl_open + obj:/lib/libdl-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/libdl-2.3.4.so + fun:dlopen +} + +{ + Avoid problem in glibc on gentoo + Memcheck:Cond + obj:/lib/ld-2.3.4.so + obj:/lib/libc-2.3.4.so + obj:/lib/ld-2.3.4.so + fun:_dl_open + obj:/lib/libdl-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/libdl-2.3.4.so + fun:dlopen +} + +{ + Avoid problem in glibc on gentoo + Memcheck:Cond + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/libc-2.3.4.so + obj:/lib/ld-2.3.4.so + fun:_dl_open + obj:/lib/libdl-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/libdl-2.3.4.so + fun:dlopen +} + +{ + Avoid problems w/readline doing a putenv and leaking on exit + Memcheck:Leak + fun:malloc + fun:xmalloc + fun:sh_set_lines_and_columns + fun:_rl_get_screen_size + fun:_rl_init_terminal_io + obj:/lib/libreadline.so.4.3 + fun:rl_initialize + fun:setup_readline + fun:initreadline + fun:_PyImport_LoadDynamicModule + fun:load_module + fun:import_submodule + fun:load_next + fun:import_module_ex + fun:PyImport_ImportModuleEx +} + +{ + Mysterious leak that seems to deal w/pthreads + Memcheck:Leak + fun:calloc + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + fun:_dl_allocate_tls + fun:__pthread_initialize_minimal +} + +{ + Mysterious leak that seems to deal w/pthreads + Memcheck:Leak + fun:memalign + obj:/lib/ld-2.3.4.so + fun:_dl_allocate_tls + fun:__pthread_initialize_minimal +} + ### ### These occur from somewhere within the SSL, when running ### test_socket_sll. They are too general to leave on by default. Index: vimrc =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/vimrc,v retrieving revision 1.4.4.1 retrieving revision 1.4.4.2 diff -u -d -r1.4.4.1 -r1.4.4.2 --- vimrc 7 Jan 2005 07:02:50 -0000 1.4.4.1 +++ vimrc 16 Oct 2005 05:24:03 -0000 1.4.4.2 @@ -9,12 +9,11 @@ " " All setting are protected by 'au' ('autocmd') statements. Only files ending " in .py or .pyw will trigger the Python settings while files ending in *.c or -" *.h will trigger the C settings. This make the file "safe" in terms of only +" *.h will trigger the C settings. This makes the file "safe" in terms of only " adjusting settings for Python and C files. " " Only basic settings needed to enforce the style guidelines are set. " Some suggested options are listed but commented out at the end of this file. -" " Number of spaces to use for an indent. @@ -39,6 +38,12 @@ au BufRead,BufNewFile *.c,*.h set noexpandtab au BufRead,BufNewFile Makefile* set noexpandtab +" Use the below highlight group when displaying bad whitespace is desired +highlight BadWhitespace ctermbg=red guibg=red + +" Display tabs at the beginning of a line in Python mode as bad +au BufRead,BufNewFile *.py,*.pyw match BadWhitespace /^\t\+/ + " Wrap text after a certain number of characters " Python: 79 " C: 79 @@ -67,7 +72,7 @@ " Set the default file encoding to UTF-8: ``set encoding=utf-8`` -" Put a marker at the beginning of the file to differentiate between UTF and +" Puts a marker at the beginning of the file to differentiate between UTF and " UCS encoding (WARNING: can trick shells into thinking a text file is actually " a binary file when executing the text file): ``set bomb`` @@ -75,13 +80,16 @@ "``let python_highlight_all=1`` "``syntax on`` -" Automatically indent: ``filetype indent on`` +" Automatically indent based on file type: ``filetype indent on`` +" Keep indentation level from previous line: ``set autoindent`` " Folding based on indentation: ``set foldmethod=indent`` -" Make trailing whitespace explicit: -"highlight WhitespaceEOL ctermbg=red guibg=red -"match WhitespaceEOL /\s\+$/ -" or -"set list listchars=trail:- +" Make trailing whitespace explicit (left off since this will automatically +" insert the highlight or characters *as you type*, which can get annoying): +"``match BadWhitespace /\s\+$/`` +" +" or, for a non-colored, character-based solution: +" +"``set list listchars=trail:-`` From jhylton at users.sourceforge.net Sun Oct 16 07:24:40 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:40 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib hashlib.py, NONE, 1.2.2.2 md5.py, NONE, 1.1.2.2 sha.py, NONE, 1.1.2.2 BaseHTTPServer.py, 1.22.2.2, 1.22.2.3 ConfigParser.py, 1.44.2.2, 1.44.2.3 Cookie.py, 1.13.2.2, 1.13.2.3 DocXMLRPCServer.py, 1.2.4.2, 1.2.4.3 SimpleXMLRPCServer.py, 1.2.12.2, 1.2.12.3 SocketServer.py, 1.31.2.2, 1.31.2.3 UserDict.py, 1.17.2.2, 1.17.2.3 UserString.py, 1.13.2.2, 1.13.2.3 _LWPCookieJar.py, 1.2.4.1, 1.2.4.2 _MozillaCookieJar.py, 1.3.4.1, 1.3.4.2 _strptime.py, 1.15.4.2, 1.15.4.3 asynchat.py, 1.19.2.2, 1.19.2.3 asyncore.py, 1.32.2.2, 1.32.2.3 base64.py, 1.13.16.1, 1.13.16.2 calendar.py, 1.28.2.2, 1.28.2.3 cgi.py, 1.74.2.2, 1.74.2.3 cgitb.py, 1.5.2.2, 1.5.2.3 codecs.py, 1.26.2.2, 1.26.2.3 cookielib.py, 1.4.4.1, 1.4.4.2 copy.py, 1.28.2.2, 1.28.2.3 csv.py, 1.12.2.1, 1.12.2.2 decimal.py, 1.32.2.1, 1.32.2.2 difflib.py, 1.10.2.2, 1.10.2.3 doctest.py, 1.24.2.2, 1.24.2.3 dumbdbm.py, 1.19.2.2, 1.19.2.3 ftplib.py, 1.72.2.1, 1.72.2.2 glob.py, 1.10.20.1, 1.10.20.2 gzip.py, 1.34.2.2, 1.34.2.3 hmac.py, 1.7.2.1, 1.7.2.2 httplib.py, 1.54.2.2, 1.54.2.3 imaplib.py, 1.54.2.2, 1.54.2.3 imghdr.py, 1.11, 1.11.26.1 inspect.py, 1.36.2.2, 1.36.2.3 locale.py, 1.20.2.2, 1.20.2.3 macpath.py, 1.39.2.2, 1.39.2.3 markupbase.py, 1.6.2.2, 1.6.2.3 mhlib.py, 1.34.2.2, 1.34.2.3 mimetypes.py, 1.22.2.2, 1.22.2.3 nntplib.py, 1.30.2.2, 1.30.2.3 ntpath.py, 1.49.2.2, 1.49.2.3 optparse.py, 1.4.4.2, 1.4.4.3 os.py, 1.58.2.3, 1.58.2.4 os2emxpath.py, 1.6.2.2, 1.6.2.3 pdb.py, 1.53.2.2, 1.53.2.3 pickletools.py, 1.26.6.2, 1.26.6.3 popen2.py, 1.25.2.1, 1.25.2.2 poplib.py, 1.21.2.1, 1.21.2.2 posixfile.py, 1.24.10.1, 1.24.10.2 posixpath.py, 1.51.2.2, 1.51.2.3 profile.py, 1.47.2.2, 1.47.2.3 py_compile.py, 1.21.2.1, 1.21.2.2 pydoc.py, 1.65.2.2, 1.65.2.3 random.py, 1.34.2.2, 1.34.2.3 reconvert.py, 1.6.16.1, 1.6.16.2 rfc822.py, 1.72.2.2, 1.72.2.3 sets.py, 1.43.4.2, 1.43.4.3 shutil.py, 1.22.2.2, 1.22.2.3 smtplib.py, 1.58.2.2, 1.58.2.3 socket.py, 1.21.2.2, 1.21.2.3 sre.py, 1.44.10.2, 1.44.10.3 sre_compile.py, 1.43.2.2, 1.43.2.3 sre_parse.py, 1.55.2.2, 1.55.2.3 subprocess.py, 1.13.2.1, 1.13.2.2 symbol.py, 1.14.12.2, 1.14.12.3 tarfile.py, 1.8.4.2, 1.8.4.3 telnetlib.py, 1.19.2.2, 1.19.2.3 tempfile.py, 1.39.2.2, 1.39.2.3 textwrap.py, 1.12.2.2, 1.12.2.3 threading.py, 1.24.2.2, 1.24.2.3 tokenize.py, 1.32.2.2, 1.32.2.3 unittest.py, 1.16.2.2, 1.16.2.3 urllib.py, 1.148.2.2, 1.148.2.3 urllib2.py, 1.31.2.2, 1.31.2.3 urlparse.py, 1.32.2.2, 1.32.2.3 warnings.py, 1.16.2.2, 1.16.2.3 weakref.py, 1.17.2.2, 1.17.2.3 webbrowser.py, 1.32.2.2, 1.32.2.3 whichdb.py, 1.12.10.2, 1.12.10.3 xdrlib.py, 1.14.2.2, 1.14.2.3 xmlrpclib.py, 1.20.2.2, 1.20.2.3 zipfile.py, 1.24.2.2, 1.24.2.3 profile.doc, 1.2.32.1, NONE Message-ID: <20051016052440.0540E1E4009@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Lib Modified Files: Tag: ast-branch BaseHTTPServer.py ConfigParser.py Cookie.py DocXMLRPCServer.py SimpleXMLRPCServer.py SocketServer.py UserDict.py UserString.py _LWPCookieJar.py _MozillaCookieJar.py _strptime.py asynchat.py asyncore.py base64.py calendar.py cgi.py cgitb.py codecs.py cookielib.py copy.py csv.py decimal.py difflib.py doctest.py dumbdbm.py ftplib.py glob.py gzip.py hmac.py httplib.py imaplib.py imghdr.py inspect.py locale.py macpath.py markupbase.py mhlib.py mimetypes.py nntplib.py ntpath.py optparse.py os.py os2emxpath.py pdb.py pickletools.py popen2.py poplib.py posixfile.py posixpath.py profile.py py_compile.py pydoc.py random.py reconvert.py rfc822.py sets.py shutil.py smtplib.py socket.py sre.py sre_compile.py sre_parse.py subprocess.py symbol.py tarfile.py telnetlib.py tempfile.py textwrap.py threading.py tokenize.py unittest.py urllib.py urllib2.py urlparse.py warnings.py weakref.py webbrowser.py whichdb.py xdrlib.py xmlrpclib.py zipfile.py Added Files: Tag: ast-branch hashlib.py md5.py sha.py Removed Files: Tag: ast-branch profile.doc Log Message: Merge head to branch (for the last time) --- NEW FILE: hashlib.py --- # $Id: hashlib.py,v 1.2.2.2 2005/10/16 05:23:59 jhylton Exp $ # # Copyright (C) 2005 Gregory P. Smith (greg at electricrain.com) # Licensed to PSF under a Contributor Agreement. # __doc__ = """hashlib module - A common interface to many hash functions. new(name, string='') - returns a new hash object implementing the given hash function; initializing the hash using the given string data. Named constructor functions are also available, these are much faster than using new(): md5(), sha1(), sha224(), sha256(), sha384(), and sha512() More algorithms may be available on your platform but the above are guaranteed to exist. Choose your hash function wisely. Some have known weaknesses. sha384 and sha512 will be slow on 32 bit platforms. """ def __get_builtin_constructor(name): if name in ('SHA1', 'sha1'): import _sha return _sha.new elif name in ('MD5', 'md5'): import _md5 return _md5.new elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'): import _sha256 bs = name[3:] if bs == '256': return _sha256.sha256 elif bs == '224': return _sha256.sha224 elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'): import _sha512 bs = name[3:] if bs == '512': return _sha512.sha512 elif bs == '384': return _sha512.sha384 raise ValueError, "unsupported hash type" def __py_new(name, string=''): """new(name, string='') - Return a new hashing object using the named algorithm; optionally initialized with a string. """ return __get_builtin_constructor(name)(string) def __hash_new(name, string=''): """new(name, string='') - Return a new hashing object using the named algorithm; optionally initialized with a string. """ try: return _hashlib.new(name, string) except ValueError: # If the _hashlib module (OpenSSL) doesn't support the named # hash, try using our builtin implementations. # This allows for SHA224/256 and SHA384/512 support even though # the OpenSSL library prior to 0.9.8 doesn't provide them. return __get_builtin_constructor(name)(string) try: import _hashlib # use the wrapper of the C implementation new = __hash_new for opensslFuncName in filter(lambda n: n.startswith('openssl_'), dir(_hashlib)): funcName = opensslFuncName[len('openssl_'):] try: # try them all, some may not work due to the OpenSSL # version not supporting that algorithm. f = getattr(_hashlib, opensslFuncName) f() # Use the C function directly (very fast) exec funcName + ' = f' except ValueError: try: # Use the builtin implementation directly (fast) exec funcName + ' = __get_builtin_constructor(funcName)' except ValueError: # this one has no builtin implementation, don't define it pass # clean up our locals del f del opensslFuncName del funcName except ImportError: # We don't have the _hashlib OpenSSL module? # use the built in legacy interfaces via a wrapper function new = __py_new # lookup the C function to use directly for the named constructors md5 = __get_builtin_constructor('md5') sha1 = __get_builtin_constructor('sha1') sha224 = __get_builtin_constructor('sha224') sha256 = __get_builtin_constructor('sha256') sha384 = __get_builtin_constructor('sha384') sha512 = __get_builtin_constructor('sha512') --- NEW FILE: md5.py --- # $Id: md5.py,v 1.1.2.2 2005/10/16 05:23:59 jhylton Exp $ # # Copyright (C) 2005 Gregory P. Smith (greg at electricrain.com) # Licensed to PSF under a Contributor Agreement. from hashlib import md5 new = md5 blocksize = 1 # legacy value (wrong in any useful sense) digest_size = 16 --- NEW FILE: sha.py --- # $Id: sha.py,v 1.1.2.2 2005/10/16 05:23:59 jhylton Exp $ # # Copyright (C) 2005 Gregory P. Smith (greg at electricrain.com) # Licensed to PSF under a Contributor Agreement. from hashlib import sha1 as sha new = sha blocksize = 1 # legacy value (wrong in any useful sense) digest_size = 20 digestsize = 20 Index: BaseHTTPServer.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/BaseHTTPServer.py,v retrieving revision 1.22.2.2 retrieving revision 1.22.2.3 diff -u -d -r1.22.2.2 -r1.22.2.3 --- BaseHTTPServer.py 7 Jan 2005 06:57:43 -0000 1.22.2.2 +++ BaseHTTPServer.py 16 Oct 2005 05:23:59 -0000 1.22.2.3 @@ -89,6 +89,8 @@ """ +def _quote_html(html): + return html.replace("&", "&").replace("<", "<").replace(">", ">") class HTTPServer(SocketServer.TCPServer): @@ -336,8 +338,9 @@ message = short explain = long self.log_error("code %d, message %s", code, message) + # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201) content = (self.error_message_format % - {'code': code, 'message': message, 'explain': explain}) + {'code': code, 'message': _quote_html(message), 'explain': explain}) self.send_response(code, message) self.send_header("Content-Type", "text/html") self.send_header('Connection', 'close') Index: ConfigParser.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/ConfigParser.py,v retrieving revision 1.44.2.2 retrieving revision 1.44.2.3 diff -u -d -r1.44.2.2 -r1.44.2.3 --- ConfigParser.py 7 Jan 2005 06:57:59 -0000 1.44.2.2 +++ ConfigParser.py 16 Oct 2005 05:23:59 -0000 1.44.2.3 @@ -28,7 +28,7 @@ create the parser and specify a dictionary of intrinsic defaults. The keys must be strings, the values must be appropriate for %()s string interpolation. Note that `__name__' is always an intrinsic default; - it's value is the section's name. + its value is the section's name. sections() return all the configuration section names, sans DEFAULT Index: Cookie.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/Cookie.py,v retrieving revision 1.13.2.2 retrieving revision 1.13.2.3 diff -u -d -r1.13.2.2 -r1.13.2.3 --- Cookie.py 7 Jan 2005 06:58:00 -0000 1.13.2.2 +++ Cookie.py 16 Oct 2005 05:23:59 -0000 1.13.2.3 @@ -69,9 +69,8 @@ >>> C = Cookie.SmartCookie() >>> C["fig"] = "newton" >>> C["sugar"] = "wafer" - >>> print C - Set-Cookie: fig=newton; - Set-Cookie: sugar=wafer; + >>> C.output() + 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer' Notice that the printable representation of a Cookie is the appropriate format for a Set-Cookie: header. This is the @@ -82,9 +81,9 @@ >>> C["rocky"] = "road" >>> C["rocky"]["path"] = "/cookie" >>> print C.output(header="Cookie:") - Cookie: rocky=road; Path=/cookie; + Cookie: rocky=road; Path=/cookie >>> print C.output(attrs=[], header="Cookie:") - Cookie: rocky=road; + Cookie: rocky=road The load() method of a Cookie extracts cookies from a string. In a CGI script, you would use this method to extract the cookies from the @@ -92,9 +91,8 @@ >>> C = Cookie.SmartCookie() >>> C.load("chips=ahoy; vienna=finger") - >>> print C - Set-Cookie: chips=ahoy; - Set-Cookie: vienna=finger; + >>> C.output() + 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger' The load() method is darn-tootin smart about identifying cookies within a string. Escaped quotation marks, nested semicolons, and other @@ -103,7 +101,7 @@ >>> C = Cookie.SmartCookie() >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') >>> print C - Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;"; + Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" Each element of the Cookie also supports all of the RFC 2109 Cookie attributes. Here's an example which sets the Path @@ -113,7 +111,7 @@ >>> C["oreo"] = "doublestuff" >>> C["oreo"]["path"] = "/" >>> print C - Set-Cookie: oreo=doublestuff; Path=/; + Set-Cookie: oreo=doublestuff; Path=/ Each dictionary element has a 'value' attribute, which gives you back the value associated with the key. @@ -144,9 +142,8 @@ '7' >>> C["string"].value 'seven' - >>> print C - Set-Cookie: number=7; - Set-Cookie: string=seven; + >>> C.output() + 'Set-Cookie: number=7\r\nSet-Cookie: string=seven' SerialCookie @@ -165,9 +162,8 @@ 7 >>> C["string"].value 'seven' - >>> print C - Set-Cookie: number="I7\012."; - Set-Cookie: string="S'seven'\012p1\012."; + >>> C.output() + 'Set-Cookie: number="I7\\012."\r\nSet-Cookie: string="S\'seven\'\\012p1\\012."' Be warned, however, if SerialCookie cannot de-serialize a value (because it isn't a valid pickle'd object), IT WILL RAISE AN EXCEPTION. @@ -190,9 +186,8 @@ 7 >>> C["string"].value 'seven' - >>> print C - Set-Cookie: number="I7\012."; - Set-Cookie: string=seven; + >>> C.output() + 'Set-Cookie: number="I7\\012."\r\nSet-Cookie: string=seven' Backwards Compatibility @@ -228,6 +223,7 @@ "SmartCookie","Cookie"] _nulljoin = ''.join +_semispacejoin = '; '.join _spacejoin = ' '.join # @@ -470,9 +466,9 @@ def js_output(self, attrs=None): # Print javascript return """ - """ % ( self.OutputString(attrs), ) @@ -485,7 +481,7 @@ RA = result.append # First, the key=value pair - RA("%s=%s;" % (self.key, self.coded_value)) + RA("%s=%s" % (self.key, self.coded_value)) # Now add any defined attributes if attrs is None: @@ -496,16 +492,16 @@ if V == "": continue if K not in attrs: continue if K == "expires" and type(V) == type(1): - RA("%s=%s;" % (self._reserved[K], _getdate(V))) + RA("%s=%s" % (self._reserved[K], _getdate(V))) elif K == "max-age" and type(V) == type(1): - RA("%s=%d;" % (self._reserved[K], V)) + RA("%s=%d" % (self._reserved[K], V)) elif K == "secure": - RA("%s;" % self._reserved[K]) + RA(str(self._reserved[K])) else: - RA("%s=%s;" % (self._reserved[K], V)) + RA("%s=%s" % (self._reserved[K], V)) # Return the result - return _spacejoin(result) + return _semispacejoin(result) # end OutputString # end Morsel class @@ -581,7 +577,7 @@ self.__set(key, rval, cval) # end __setitem__ - def output(self, attrs=None, header="Set-Cookie:", sep="\n"): + def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"): """Return a string suitable for HTTP.""" result = [] items = self.items() Index: DocXMLRPCServer.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/DocXMLRPCServer.py,v retrieving revision 1.2.4.2 retrieving revision 1.2.4.3 diff -u -d -r1.2.4.2 -r1.2.4.3 --- DocXMLRPCServer.py 7 Jan 2005 06:58:00 -0000 1.2.4.2 +++ DocXMLRPCServer.py 16 Oct 2005 05:23:59 -0000 1.2.4.3 @@ -12,7 +12,6 @@ import pydoc import inspect -import types import re import sys @@ -92,7 +91,7 @@ else: argspec = '(...)' - if isinstance(object, types.TupleType): + if isinstance(object, tuple): argspec = object[0] or argspec docstring = object[1] or "" else: Index: SimpleXMLRPCServer.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/SimpleXMLRPCServer.py,v retrieving revision 1.2.12.2 retrieving revision 1.2.12.3 diff -u -d -r1.2.12.2 -r1.2.12.3 --- SimpleXMLRPCServer.py 7 Jan 2005 06:58:00 -0000 1.2.12.2 +++ SimpleXMLRPCServer.py 16 Oct 2005 05:23:59 -0000 1.2.12.3 @@ -106,14 +106,22 @@ import sys import os -def resolve_dotted_attribute(obj, attr): +def resolve_dotted_attribute(obj, attr, allow_dotted_names=True): """resolve_dotted_attribute(a, 'b.c.d') => a.b.c.d Resolves a dotted attribute name to an object. Raises an AttributeError if any attribute in the chain starts with a '_'. + + If the optional allow_dotted_names argument is false, dots are not + supported and this function operates similar to getattr(obj, attr). """ - for i in attr.split('.'): + if allow_dotted_names: + attrs = attr.split('.') + else: + attrs = [attr] + + for i in attrs: if i.startswith('_'): raise AttributeError( 'attempt to access private attribute "%s"' % i @@ -155,14 +163,14 @@ self.funcs = {} self.instance = None - def register_instance(self, instance): + def register_instance(self, instance, allow_dotted_names=False): """Registers an instance to respond to XML-RPC requests. Only one instance can be installed at a time. If the registered instance has a _dispatch method then that method will be called with the name of the XML-RPC method and - it's parameters as a tuple + its parameters as a tuple e.g. instance._dispatch('add',(2,3)) If the registered instance does not have a _dispatch method @@ -173,9 +181,23 @@ If a registered function matches a XML-RPC request, then it will be called instead of the registered instance. + + If the optional allow_dotted_names argument is true and the + instance does not have a _dispatch method, method names + containing dots are supported and resolved, as long as none of + the name segments start with an '_'. + + *** SECURITY WARNING: *** + + Enabling the allow_dotted_names options allows intruders + to access your module's global variables and may allow + intruders to execute arbitrary code on your machine. Only + use this option on a secure, closed network. + """ self.instance = instance + self.allow_dotted_names = allow_dotted_names def register_function(self, function, name = None): """Registers a function to respond to XML-RPC requests. @@ -294,7 +316,8 @@ try: method = resolve_dotted_attribute( self.instance, - method_name + method_name, + self.allow_dotted_names ) except AttributeError: pass @@ -348,7 +371,7 @@ If the registered instance has a _dispatch method then that method will be called with the name of the XML-RPC method and - it's parameters as a tuple + its parameters as a tuple e.g. instance._dispatch('add',(2,3)) If the registered instance does not have a _dispatch method @@ -373,7 +396,8 @@ try: func = resolve_dotted_attribute( self.instance, - method + method, + self.allow_dotted_names ) except AttributeError: pass Index: SocketServer.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/SocketServer.py,v retrieving revision 1.31.2.2 retrieving revision 1.31.2.3 diff -u -d -r1.31.2.2 -r1.31.2.3 --- SocketServer.py 7 Jan 2005 06:58:00 -0000 1.31.2.2 +++ SocketServer.py 16 Oct 2005 05:23:59 -0000 1.31.2.3 @@ -50,7 +50,7 @@ unix server classes. Forking and threading versions of each type of server can be created -using the ForkingServer and ThreadingServer mix-in classes. For +using the ForkingMixIn and ThreadingMixIn mix-in classes. For instance, a threading UDP server class is created as follows: class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass @@ -65,8 +65,8 @@ with your request handler class. The request handler class must be different for datagram or stream -services. This can be hidden by using the mix-in request handler -classes StreamRequestHandler or DatagramRequestHandler. +services. This can be hidden by using the request handler +subclasses StreamRequestHandler or DatagramRequestHandler. Of course, you still have to use your head! Index: UserDict.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/UserDict.py,v retrieving revision 1.17.2.2 retrieving revision 1.17.2.3 diff -u -d -r1.17.2.2 -r1.17.2.3 --- UserDict.py 7 Jan 2005 06:58:00 -0000 1.17.2.2 +++ UserDict.py 16 Oct 2005 05:23:59 -0000 1.17.2.3 @@ -63,12 +63,12 @@ return self.data.popitem() def __contains__(self, key): return key in self.data + @classmethod def fromkeys(cls, iterable, value=None): d = cls() for key in iterable: d[key] = value return d - fromkeys = classmethod(fromkeys) class IterableUserDict(UserDict): def __iter__(self): Index: UserString.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/UserString.py,v retrieving revision 1.13.2.2 retrieving revision 1.13.2.3 diff -u -d -r1.13.2.2 -r1.13.2.3 --- UserString.py 7 Jan 2005 06:58:00 -0000 1.13.2.2 +++ UserString.py 16 Oct 2005 05:23:59 -0000 1.13.2.3 @@ -146,9 +146,13 @@ def __hash__(self): raise TypeError, "unhashable type (it is mutable)" def __setitem__(self, index, sub): + if index < 0: + index += len(self.data) if index < 0 or index >= len(self.data): raise IndexError self.data = self.data[:index] + sub + self.data[index+1:] def __delitem__(self, index): + if index < 0: + index += len(self.data) if index < 0 or index >= len(self.data): raise IndexError self.data = self.data[:index] + self.data[index+1:] def __setslice__(self, start, end, sub): Index: _LWPCookieJar.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/_LWPCookieJar.py,v retrieving revision 1.2.4.1 retrieving revision 1.2.4.2 diff -u -d -r1.2.4.1 -r1.2.4.2 --- _LWPCookieJar.py 7 Jan 2005 06:58:00 -0000 1.2.4.1 +++ _LWPCookieJar.py 16 Oct 2005 05:23:59 -0000 1.2.4.2 @@ -115,13 +115,6 @@ for data in split_header_words([line]): name, value = data[0] - # name and value are an exception here, since a plain "foo" - # (with no "=", unlike "bar=foo") means a cookie with no - # name and value "foo". With all other cookie-attributes, - # the situation is reversed: "foo" means an attribute named - # "foo" with no value! - if value is None: - name, value = value, name standard = {} rest = {} for k in boolean_attrs: Index: _MozillaCookieJar.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/_MozillaCookieJar.py,v retrieving revision 1.3.4.1 retrieving revision 1.3.4.2 diff -u -d -r1.3.4.1 -r1.3.4.2 --- _MozillaCookieJar.py 7 Jan 2005 06:58:00 -0000 1.3.4.1 +++ _MozillaCookieJar.py 16 Oct 2005 05:23:59 -0000 1.3.4.2 @@ -73,6 +73,9 @@ secure = (secure == "TRUE") domain_specified = (domain_specified == "TRUE") if name == "": + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas cookielib regards it as a + # cookie with no value. name = value value = None Index: _strptime.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/_strptime.py,v retrieving revision 1.15.4.2 retrieving revision 1.15.4.3 diff -u -d -r1.15.4.2 -r1.15.4.3 --- _strptime.py 7 Jan 2005 06:58:00 -0000 1.15.4.2 +++ _strptime.py 16 Oct 2005 05:23:59 -0000 1.15.4.3 @@ -147,11 +147,14 @@ # strings (e.g., MacOS 9 having timezone as ('','')). if old: current_format = current_format.replace(old, new) + # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since + # 2005-01-03 occurs before the first Monday of the year. Otherwise + # %U is used. time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0)) - if time.strftime(directive, time_tuple).find('00'): - U_W = '%U' - else: + if '00' in time.strftime(directive, time_tuple): U_W = '%W' + else: + U_W = '%U' date_time[offset] = current_format.replace('11', U_W) self.LC_date_time = date_time[0] self.LC_date = date_time[1] @@ -272,13 +275,14 @@ def strptime(data_string, format="%a %b %d %H:%M:%S %Y"): """Return a time struct based on the input string and the format string.""" - global _TimeRE_cache + global _TimeRE_cache, _regex_cache _cache_lock.acquire() try: time_re = _TimeRE_cache locale_time = time_re.locale_time if _getlang() != locale_time.lang: _TimeRE_cache = TimeRE() + _regex_cache = {} if len(_regex_cache) > _CACHE_MAX_SIZE: _regex_cache.clear() format_regex = _regex_cache.get(format) Index: asynchat.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/asynchat.py,v retrieving revision 1.19.2.2 retrieving revision 1.19.2.3 diff -u -d -r1.19.2.2 -r1.19.2.3 --- asynchat.py 7 Jan 2005 06:58:00 -0000 1.19.2.2 +++ asynchat.py 16 Oct 2005 05:23:59 -0000 1.19.2.3 @@ -101,11 +101,11 @@ while self.ac_in_buffer: lb = len(self.ac_in_buffer) terminator = self.get_terminator() - if terminator is None or terminator == '': + if not terminator: # no terminator, collect it all self.collect_incoming_data (self.ac_in_buffer) self.ac_in_buffer = '' - elif isinstance(terminator, int): + elif isinstance(terminator, int) or isinstance(terminator, long): # numeric terminator n = terminator if lb < n: Index: asyncore.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/asyncore.py,v retrieving revision 1.32.2.2 retrieving revision 1.32.2.3 diff -u -d -r1.32.2.2 -r1.32.2.3 --- asyncore.py 7 Jan 2005 06:58:00 -0000 1.32.2.2 +++ asyncore.py 16 Oct 2005 05:23:59 -0000 1.32.2.3 @@ -46,7 +46,6 @@ sophisticated high-performance network servers and clients a snap. """ -import exceptions import select import socket import sys @@ -61,7 +60,7 @@ except NameError: socket_map = {} -class ExitNow(exceptions.Exception): +class ExitNow(Exception): pass def read(obj): Index: base64.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/base64.py,v retrieving revision 1.13.16.1 retrieving revision 1.13.16.2 diff -u -d -r1.13.16.1 -r1.13.16.2 --- base64.py 7 Jan 2005 06:58:00 -0000 1.13.16.1 +++ base64.py 16 Oct 2005 05:23:59 -0000 1.13.16.2 @@ -221,12 +221,14 @@ acc += _b32rev[c] << shift shift -= 5 if shift < 0: - parts.append(binascii.unhexlify(hex(acc)[2:-1])) + parts.append(binascii.unhexlify('%010x' % acc)) acc = 0 shift = 35 # Process the last, partial quanta - last = binascii.unhexlify(hex(acc)[2:-1]) - if padchars == 1: + last = binascii.unhexlify('%010x' % acc) + if padchars == 0: + last = '' # No characters + elif padchars == 1: last = last[:-1] elif padchars == 3: last = last[:-2] @@ -234,7 +236,7 @@ last = last[:-3] elif padchars == 6: last = last[:-4] - elif padchars <> 0: + else: raise TypeError('Incorrect padding') parts.append(last) return EMPTYSTRING.join(parts) Index: calendar.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/calendar.py,v retrieving revision 1.28.2.2 retrieving revision 1.28.2.3 diff -u -d -r1.28.2.2 -r1.28.2.3 --- calendar.py 7 Jan 2005 06:58:01 -0000 1.28.2.2 +++ calendar.py 16 Oct 2005 05:23:59 -0000 1.28.2.3 @@ -10,7 +10,8 @@ __all__ = ["error","setfirstweekday","firstweekday","isleap", "leapdays","weekday","monthrange","monthcalendar", "prmonth","month","prcal","calendar","timegm", - "month_name", "month_abbr", "day_name", "day_abbr"] + "month_name", "month_abbr", "day_name", "day_abbr", + "weekheader"] # Exception raised for bad input (with string parameter for details) error = ValueError Index: cgi.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/cgi.py,v retrieving revision 1.74.2.2 retrieving revision 1.74.2.3 diff -u -d -r1.74.2.2 -r1.74.2.3 --- cgi.py 7 Jan 2005 06:58:01 -0000 1.74.2.2 +++ cgi.py 16 Oct 2005 05:23:59 -0000 1.74.2.3 @@ -237,7 +237,7 @@ Arguments: fp : input file - pdict: dictionary containing other parameters of conten-type header + pdict: dictionary containing other parameters of content-type header Returns a dictionary just like parse_qs(): keys are the field names, each value is a list of values for that field. This is easy to use but not @@ -1039,7 +1039,9 @@ # ========= def escape(s, quote=None): - """Replace special characters '&', '<' and '>' by SGML entities.""" + '''Replace special characters "&", "<" and ">" to HTML-safe sequences. + If the optional flag quote is true, the quotation mark character (") + is also translated.''' s = s.replace("&", "&") # Must be done first! s = s.replace("<", "<") s = s.replace(">", ">") Index: cgitb.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/cgitb.py,v retrieving revision 1.5.2.2 retrieving revision 1.5.2.3 diff -u -d -r1.5.2.2 -r1.5.2.3 --- cgitb.py 7 Jan 2005 06:58:01 -0000 1.5.2.2 +++ cgitb.py 16 Oct 2005 05:23:59 -0000 1.5.2.3 @@ -22,6 +22,7 @@ """ __author__ = 'Ka-Ping Yee' + __version__ = '$Revision$' import sys @@ -112,8 +113,11 @@ frames = [] records = inspect.getinnerframes(etb, context) for frame, file, lnum, func, lines, index in records: - file = file and os.path.abspath(file) or '?' - link = '%s' % (file, pydoc.html.escape(file)) + if file: + file = os.path.abspath(file) + link = '%s' % (file, pydoc.html.escape(file)) + else: + file = link = '?' args, varargs, varkw, locals = inspect.getargvalues(frame) call = '' if func != '?': @@ -146,7 +150,7 @@ if name in done: continue done[name] = 1 if value is not __UNDEF__: - if where in ['global', 'builtin']: + if where in ('global', 'builtin'): name = ('%s ' % where) + strong(name) elif where == 'local': name = strong(name) Index: codecs.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/codecs.py,v retrieving revision 1.26.2.2 retrieving revision 1.26.2.3 diff -u -d -r1.26.2.2 -r1.26.2.3 --- codecs.py 7 Jan 2005 06:58:01 -0000 1.26.2.2 +++ codecs.py 16 Oct 2005 05:23:59 -0000 1.26.2.3 @@ -229,13 +229,15 @@ self.stream = stream self.errors = errors self.bytebuffer = "" - self.charbuffer = u"" - self.atcr = False + # For str->str decoding this will stay a str + # For str->unicode decoding the first read will promote it to unicode + self.charbuffer = "" + self.linebuffer = None def decode(self, input, errors='strict'): raise NotImplementedError - def read(self, size=-1, chars=-1): + def read(self, size=-1, chars=-1, firstline=False): """ Decodes data from the stream self.stream and returns the resulting object. @@ -252,12 +254,22 @@ is intended to prevent having to decode huge files in one step. + If firstline is true, and a UnicodeDecodeError happens + after the first line terminator in the input only the first line + will be returned, the rest of the input will be kept until the + next call to read(). + The method should use a greedy read strategy meaning that it should read as much data as is allowed within the definition of the encoding and the given size, e.g. if optional encoding endings or state markers are available on the stream, these should be read too. """ + # If we have lines cached, first merge them back into characters + if self.linebuffer: + self.charbuffer = "".join(self.linebuffer) + self.linebuffer = None + # read until we get the required number of characters (if available) while True: # can the request can be satisfied from the character buffer? @@ -274,7 +286,16 @@ newdata = self.stream.read(size) # decode bytes (those remaining from the last call included) data = self.bytebuffer + newdata - newchars, decodedbytes = self.decode(data, self.errors) + try: + newchars, decodedbytes = self.decode(data, self.errors) + except UnicodeDecodeError, exc: + if firstline: + newchars, decodedbytes = self.decode(data[:exc.start], self.errors) + lines = newchars.splitlines(True) + if len(lines)<=1: + raise + else: + raise # keep undecoded bytes until the next call self.bytebuffer = data[decodedbytes:] # put new characters in the character buffer @@ -285,7 +306,7 @@ if chars < 0: # Return everything we've got result = self.charbuffer - self.charbuffer = u"" + self.charbuffer = "" else: # Return the first chars characters result = self.charbuffer[:chars] @@ -301,30 +322,63 @@ read() method. """ + # If we have lines cached from an earlier read, return + # them unconditionally + if self.linebuffer: + line = self.linebuffer[0] + del self.linebuffer[0] + if len(self.linebuffer) == 1: + # revert to charbuffer mode; we might need more data + # next time + self.charbuffer = self.linebuffer[0] + self.linebuffer = None + if not keepends: + line = line.splitlines(False)[0] + return line + readsize = size or 72 - line = u"" + line = "" # If size is given, we call read() only once while True: - data = self.read(readsize) - if self.atcr and data.startswith(u"\n"): - data = data[1:] + data = self.read(readsize, firstline=True) if data: - self.atcr = data.endswith(u"\r") + # If we're at a "\r" read one extra character (which might + # be a "\n") to get a proper line ending. If the stream is + # temporarily exhausted we return the wrong line ending. + if data.endswith("\r"): + data += self.read(size=1, chars=1) + line += data lines = line.splitlines(True) if lines: + if len(lines) > 1: + # More than one line result; the first line is a full line + # to return + line = lines[0] + del lines[0] + if len(lines) > 1: + # cache the remaining lines + lines[-1] += self.charbuffer + self.linebuffer = lines + self.charbuffer = None + else: + # only one remaining line, put it back into charbuffer + self.charbuffer = lines[0] + self.charbuffer + if not keepends: + line = line.splitlines(False)[0] + break line0withend = lines[0] line0withoutend = lines[0].splitlines(False)[0] if line0withend != line0withoutend: # We really have a line end # Put the rest back together and keep it until the next call - self.charbuffer = u"".join(lines[1:]) + self.charbuffer + self.charbuffer = "".join(lines[1:]) + self.charbuffer if keepends: line = line0withend else: line = line0withoutend - break + break # we didn't get anything or this was our only try - elif not data or size is not None: + if not data or size is not None: if line and not keepends: line = line.splitlines(False)[0] break @@ -356,7 +410,17 @@ from decoding errors. """ - pass + self.bytebuffer = "" + self.charbuffer = u"" + self.linebuffer = None + + def seek(self, offset, whence=0): + """ Set the input stream's current position. + + Resets the codec buffers used for keeping state. + """ + self.reset() + self.stream.seek(offset, whence) def next(self): @@ -529,7 +593,9 @@ def next(self): """ Return the next decoded line from the input stream.""" - return self.reader.next() + data = self.reader.next() + data, bytesencoded = self.encode(data, self.errors) + return data def __iter__(self): return self @@ -566,7 +632,7 @@ Note: The wrapped version will only accept the object format defined by the codecs, i.e. Unicode objects for most builtin - codecs. Output is also codec dependent and will usually by + codecs. Output is also codec dependent and will usually be Unicode as well. Files are always opened in binary mode, even if no binary mode @@ -720,11 +786,19 @@ ### error handlers -strict_errors = lookup_error("strict") -ignore_errors = lookup_error("ignore") -replace_errors = lookup_error("replace") -xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace") -backslashreplace_errors = lookup_error("backslashreplace") +try: + strict_errors = lookup_error("strict") + ignore_errors = lookup_error("ignore") + replace_errors = lookup_error("replace") + xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace") + backslashreplace_errors = lookup_error("backslashreplace") +except LookupError: + # In --disable-unicode builds, these error handler are missing + strict_errors = None + ignore_errors = None + replace_errors = None + xmlcharrefreplace_errors = None + backslashreplace_errors = None # Tell modulefinder that using codecs probably needs the encodings # package Index: cookielib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/cookielib.py,v retrieving revision 1.4.4.1 retrieving revision 1.4.4.2 diff -u -d -r1.4.4.1 -r1.4.4.2 --- cookielib.py 7 Jan 2005 06:58:01 -0000 1.4.4.1 +++ cookielib.py 16 Oct 2005 05:23:59 -0000 1.4.4.2 @@ -26,7 +26,6 @@ """ import sys, re, urlparse, copy, time, urllib, logging -from types import StringTypes try: import threading as _threading except ImportError: @@ -359,7 +358,7 @@ [[('Basic', None), ('realm', '"foobar"')]] """ - assert type(header_values) not in StringTypes + assert not isinstance(header_values, basestring) result = [] for text in header_values: orig_text = text @@ -448,19 +447,15 @@ for ns_header in ns_headers: pairs = [] version_set = False - for param in re.split(r";\s*", ns_header): + for ii, param in enumerate(re.split(r";\s*", ns_header)): param = param.rstrip() if param == "": continue if "=" not in param: - if param.lower() in known_attrs: - k, v = param, None - else: - # cookie with missing value - k, v = param, None + k, v = param, None else: k, v = re.split(r"\s*=\s*", param, 1) k = k.lstrip() - if k is not None: + if ii != 0: lc = k.lower() if lc in known_attrs: k = lc @@ -783,12 +778,12 @@ def __repr__(self): args = [] - for name in ["version", "name", "value", + for name in ("version", "name", "value", "port", "port_specified", "domain", "domain_specified", "domain_initial_dot", "path", "path_specified", "secure", "expires", "discard", "comment", "comment_url", - ]: + ): attr = getattr(self, name) args.append("%s=%s" % (name, repr(attr))) args.append("rest=%s" % repr(self._rest)) @@ -981,9 +976,9 @@ if j == 0: # domain like .foo.bar tld = domain[i+1:] sld = domain[j+1:i] - if (sld.lower() in [ + if (sld.lower() in ( "co", "ac", - "com", "edu", "org", "net", "gov", "mil", "int"] and + "com", "edu", "org", "net", "gov", "mil", "int") and len(tld) == 2): # domain like .co.uk debug(" country-code second level domain %s", domain) @@ -1134,11 +1129,10 @@ # having to load lots of MSIE cookie files unless necessary. req_host, erhn = eff_request_host(request) if not req_host.startswith("."): - dotted_req_host = "."+req_host + req_host = "."+req_host if not erhn.startswith("."): - dotted_erhn = "."+erhn - if not (dotted_req_host.endswith(domain) or - dotted_erhn.endswith(domain)): + erhn = "."+erhn + if not (req_host.endswith(domain) or erhn.endswith(domain)): #debug(" request domain %s does not match cookie domain %s", # req_host, domain) return False @@ -1416,7 +1410,7 @@ v = self._now + v if (k in value_attrs) or (k in boolean_attrs): if (v is None and - k not in ["port", "comment", "commenturl"]): + k not in ("port", "comment", "commenturl")): debug(" missing value for %s attribute" % k) bad_cookie = True break Index: copy.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/copy.py,v retrieving revision 1.28.2.2 retrieving revision 1.28.2.3 diff -u -d -r1.28.2.2 -r1.28.2.3 --- copy.py 7 Jan 2005 06:58:01 -0000 1.28.2.2 +++ copy.py 16 Oct 2005 05:23:59 -0000 1.28.2.3 @@ -14,7 +14,7 @@ class instances). - A shallow copy constructs a new compound object and then (to the - extent possible) inserts *the same objects* into in that the + extent possible) inserts *the same objects* into it that the original contains. - A deep copy constructs a new compound object and then, recursively, @@ -99,7 +99,7 @@ def _copy_immutable(x): return x -for t in (types.NoneType, int, long, float, bool, str, tuple, +for t in (type(None), int, long, float, bool, str, tuple, frozenset, type, xrange, types.ClassType, types.BuiltinFunctionType): d[t] = _copy_immutable @@ -195,26 +195,26 @@ def _deepcopy_atomic(x, memo): return x -d[types.NoneType] = _deepcopy_atomic -d[types.IntType] = _deepcopy_atomic -d[types.LongType] = _deepcopy_atomic -d[types.FloatType] = _deepcopy_atomic -d[types.BooleanType] = _deepcopy_atomic +d[type(None)] = _deepcopy_atomic +d[int] = _deepcopy_atomic +d[long] = _deepcopy_atomic +d[float] = _deepcopy_atomic +d[bool] = _deepcopy_atomic try: - d[types.ComplexType] = _deepcopy_atomic -except AttributeError: + d[complex] = _deepcopy_atomic +except NameError: pass -d[types.StringType] = _deepcopy_atomic +d[str] = _deepcopy_atomic try: - d[types.UnicodeType] = _deepcopy_atomic -except AttributeError: + d[unicode] = _deepcopy_atomic +except NameError: pass try: d[types.CodeType] = _deepcopy_atomic except AttributeError: pass -d[types.TypeType] = _deepcopy_atomic -d[types.XRangeType] = _deepcopy_atomic +d[type] = _deepcopy_atomic +d[xrange] = _deepcopy_atomic d[types.ClassType] = _deepcopy_atomic d[types.BuiltinFunctionType] = _deepcopy_atomic @@ -224,7 +224,7 @@ for a in x: y.append(deepcopy(a, memo)) return y -d[types.ListType] = _deepcopy_list +d[list] = _deepcopy_list def _deepcopy_tuple(x, memo): y = [] @@ -243,7 +243,7 @@ y = x memo[d] = y return y -d[types.TupleType] = _deepcopy_tuple +d[tuple] = _deepcopy_tuple def _deepcopy_dict(x, memo): y = {} @@ -251,7 +251,7 @@ for key, value in x.iteritems(): y[deepcopy(key, memo)] = deepcopy(value, memo) return y -d[types.DictionaryType] = _deepcopy_dict +d[dict] = _deepcopy_dict if PyStringMap is not None: d[PyStringMap] = _deepcopy_dict Index: csv.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/csv.py,v retrieving revision 1.12.2.1 retrieving revision 1.12.2.2 diff -u -d -r1.12.2.1 -r1.12.2.2 --- csv.py 7 Jan 2005 06:58:02 -0000 1.12.2.1 +++ csv.py 16 Oct 2005 05:23:59 -0000 1.12.2.2 @@ -6,8 +6,10 @@ import re from _csv import Error, __version__, writer, reader, register_dialect, \ unregister_dialect, get_dialect, list_dialects, \ + field_size_limit, \ QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \ __doc__ +from _csv import Dialect as _Dialect try: from cStringIO import StringIO @@ -41,48 +43,14 @@ def __init__(self): if self.__class__ != Dialect: self._valid = True - errors = self._validate() - if errors != []: - raise Error, "Dialect did not validate: %s" % ", ".join(errors) + self._validate() def _validate(self): - errors = [] - if not self._valid: - errors.append("can't directly instantiate Dialect class") - - if self.delimiter is None: - errors.append("delimiter character not set") - elif (not isinstance(self.delimiter, str) or - len(self.delimiter) > 1): - errors.append("delimiter must be one-character string") - - if self.quotechar is None: - if self.quoting != QUOTE_NONE: - errors.append("quotechar not set") - elif (not isinstance(self.quotechar, str) or - len(self.quotechar) > 1): - errors.append("quotechar must be one-character string") - - if self.lineterminator is None: - errors.append("lineterminator not set") - elif not isinstance(self.lineterminator, str): - errors.append("lineterminator must be a string") - - if self.doublequote not in (True, False): - errors.append("doublequote parameter must be True or False") - - if self.skipinitialspace not in (True, False): - errors.append("skipinitialspace parameter must be True or False") - - if self.quoting is None: - errors.append("quoting parameter not set") - - if self.quoting is QUOTE_NONE: - if (not isinstance(self.escapechar, (unicode, str)) or - len(self.escapechar) > 1): - errors.append("escapechar must be a one-character string or unicode object") - - return errors + try: + _Dialect(self) + except TypeError, e: + # We do this for compatibility with py2.3 + raise Error(str(e)) class excel(Dialect): """Describe the usual properties of Excel-generated CSV files.""" Index: decimal.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/decimal.py,v retrieving revision 1.32.2.1 retrieving revision 1.32.2.2 diff -u -d -r1.32.2.1 -r1.32.2.2 --- decimal.py 7 Jan 2005 06:58:02 -0000 1.32.2.1 +++ decimal.py 16 Oct 2005 05:23:59 -0000 1.32.2.2 @@ -134,7 +134,7 @@ 'setcontext', 'getcontext' ] -import copy +import copy as _copy #Rounding ROUND_DOWN = 'ROUND_DOWN' @@ -515,7 +515,7 @@ if isinstance(value, (list,tuple)): if len(value) != 3: raise ValueError, 'Invalid arguments' - if value[0] not in [0,1]: + if value[0] not in (0,1): raise ValueError, 'Invalid sign' for digit in value[1]: if not isinstance(digit, (int,long)) or digit < 0: @@ -645,6 +645,8 @@ def __cmp__(self, other, context=None): other = _convert_other(other) + if other is NotImplemented: + return other if self._is_special or other._is_special: ans = self._check_nans(other, context) @@ -696,12 +698,12 @@ def __eq__(self, other): if not isinstance(other, (Decimal, int, long)): - return False + return NotImplemented return self.__cmp__(other) == 0 def __ne__(self, other): if not isinstance(other, (Decimal, int, long)): - return True + return NotImplemented return self.__cmp__(other) != 0 def compare(self, other, context=None): @@ -714,6 +716,8 @@ Like __cmp__, but returns Decimal instances. """ other = _convert_other(other) + if other is NotImplemented: + return other #compare(NaN, NaN) = NaN if (self._is_special or other and other._is_special): @@ -728,6 +732,10 @@ # Decimal integers must hash the same as the ints # Non-integer decimals are normalized and hashed as strings # Normalization assures that hast(100E-1) == hash(10) + if self._is_special: + if self._isnan(): + raise TypeError('Cannot hash a NaN value.') + return hash(str(self)) i = int(self) if self == Decimal(i): return hash(i) @@ -752,18 +760,19 @@ Captures all of the information in the underlying representation. """ - if self._isnan(): - minus = '-'*self._sign - if self._int == (0,): - info = '' - else: - info = ''.join(map(str, self._int)) - if self._isnan() == 2: - return minus + 'sNaN' + info - return minus + 'NaN' + info - if self._isinfinity(): - minus = '-'*self._sign - return minus + 'Infinity' + if self._is_special: + if self._isnan(): + minus = '-'*self._sign + if self._int == (0,): + info = '' + else: + info = ''.join(map(str, self._int)) + if self._isnan() == 2: + return minus + 'sNaN' + info + return minus + 'NaN' + info + if self._isinfinity(): + minus = '-'*self._sign + return minus + 'Infinity' if context is None: context = getcontext() @@ -915,6 +924,8 @@ -INF + INF (or the reverse) cause InvalidOperation errors. """ other = _convert_other(other) + if other is NotImplemented: + return other if context is None: context = getcontext() @@ -1002,6 +1013,8 @@ def __sub__(self, other, context=None): """Return self + (-other)""" other = _convert_other(other) + if other is NotImplemented: + return other if self._is_special or other._is_special: ans = self._check_nans(other, context=context) @@ -1019,6 +1032,8 @@ def __rsub__(self, other, context=None): """Return other + (-self)""" other = _convert_other(other) + if other is NotImplemented: + return other tmp = Decimal(self) tmp._sign = 1 - tmp._sign @@ -1064,6 +1079,8 @@ (+-) INF * 0 (or its reverse) raise InvalidOperation. """ other = _convert_other(other) + if other is NotImplemented: + return other if context is None: context = getcontext() @@ -1136,6 +1153,10 @@ computing the other value are not raised. """ other = _convert_other(other) + if other is NotImplemented: + if divmod in (0, 1): + return NotImplemented + return (NotImplemented, NotImplemented) if context is None: context = getcontext() @@ -1288,6 +1309,8 @@ def __rdiv__(self, other, context=None): """Swaps self/other and returns __div__.""" other = _convert_other(other) + if other is NotImplemented: + return other return other.__div__(self, context=context) __rtruediv__ = __rdiv__ @@ -1300,6 +1323,8 @@ def __rdivmod__(self, other, context=None): """Swaps self/other and returns __divmod__.""" other = _convert_other(other) + if other is NotImplemented: + return other return other.__divmod__(self, context=context) def __mod__(self, other, context=None): @@ -1307,6 +1332,8 @@ self % other """ other = _convert_other(other) + if other is NotImplemented: + return other if self._is_special or other._is_special: ans = self._check_nans(other, context) @@ -1321,6 +1348,8 @@ def __rmod__(self, other, context=None): """Swaps self/other and returns __mod__.""" other = _convert_other(other) + if other is NotImplemented: + return other return other.__mod__(self, context=context) def remainder_near(self, other, context=None): @@ -1328,6 +1357,8 @@ Remainder nearest to 0- abs(remainder-near) <= other/2 """ other = _convert_other(other) + if other is NotImplemented: + return other if self._is_special or other._is_special: ans = self._check_nans(other, context) @@ -1407,6 +1438,8 @@ def __rfloordiv__(self, other, context=None): """Swaps self/other and returns __floordiv__.""" other = _convert_other(other) + if other is NotImplemented: + return other return other.__floordiv__(self, context=context) def __float__(self): @@ -1414,7 +1447,7 @@ return float(str(self)) def __int__(self): - """Converts self to a int, truncating if necessary.""" + """Converts self to an int, truncating if necessary.""" if self._is_special: if self._isnan(): context = getcontext() @@ -1657,6 +1690,8 @@ If modulo is None (default), don't take it mod modulo. """ n = _convert_other(n) + if n is NotImplemented: + return n if context is None: context = getcontext() @@ -1743,6 +1778,8 @@ def __rpow__(self, other, context=None): """Swaps self/other and returns __pow__.""" other = _convert_other(other) + if other is NotImplemented: + return other return other.__pow__(self, context=context) def normalize(self, context=None): @@ -1997,6 +2034,8 @@ NaN (and signals if one is sNaN). Also rounds. """ other = _convert_other(other) + if other is NotImplemented: + return other if self._is_special or other._is_special: # if one operand is a quiet NaN and the other is number, then the @@ -2044,6 +2083,8 @@ NaN (and signals if one is sNaN). Also rounds. """ other = _convert_other(other) + if other is NotImplemented: + return other if self._is_special or other._is_special: # if one operand is a quiet NaN and the other is number, then the @@ -2170,7 +2211,7 @@ del s for name, val in locals().items(): if val is None: - setattr(self, name, copy.copy(getattr(DefaultContext, name))) + setattr(self, name, _copy.copy(getattr(DefaultContext, name))) else: setattr(self, name, val) del self.self @@ -2714,7 +2755,7 @@ return a.sqrt(context=self) def subtract(self, a, b): - """Return the sum of the two operands. + """Return the difference between the two operands. >>> ExtendedContext.subtract(Decimal('1.3'), Decimal('1.07')) Decimal("0.23") @@ -2870,8 +2911,7 @@ return other if isinstance(other, (int, long)): return Decimal(other) - - raise TypeError, "You can interact Decimal only with int, long or Decimal data types." + return NotImplemented _infinity_map = { 'inf' : 1, Index: difflib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/difflib.py,v retrieving revision 1.10.2.2 retrieving revision 1.10.2.3 diff -u -d -r1.10.2.2 -r1.10.2.3 --- difflib.py 7 Jan 2005 06:58:02 -0000 1.10.2.2 +++ difflib.py 16 Oct 2005 05:23:59 -0000 1.10.2.3 @@ -1371,7 +1371,7 @@ text = ' ' # insert marks that won't be noticed by an xml/html escaper. text = '\0' + format_key + text + '\1' - # Return line of text, first allow user's line formatter to do it's + # Return line of text, first allow user's line formatter to do its # thing (such as adding the line number) then replace the special # marks with what the user's change markup. return (num_lines[side],text) @@ -1472,7 +1472,7 @@ """Yields from/to lines of text with a change indication. This function is an iterator. It itself pulls lines from the line - iterator. It's difference from that iterator is that this function + iterator. Its difference from that iterator is that this function always yields a pair of from/to text lines (with the change indication). If necessary it will collect single from/to lines until it has a matching pair from/to pair to yield. Index: doctest.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/doctest.py,v retrieving revision 1.24.2.2 retrieving revision 1.24.2.3 diff -u -d -r1.24.2.2 -r1.24.2.3 --- doctest.py 7 Jan 2005 06:58:03 -0000 1.24.2.2 +++ doctest.py 16 Oct 2005 05:23:59 -0000 1.24.2.3 @@ -2071,24 +2071,24 @@ The old flag is returned so that a runner could restore the old value if it wished to: - >>> old = _unittest_reportflags - >>> set_unittest_reportflags(REPORT_NDIFF | + >>> import doctest + >>> old = doctest._unittest_reportflags + >>> doctest.set_unittest_reportflags(REPORT_NDIFF | ... REPORT_ONLY_FIRST_FAILURE) == old True - >>> import doctest >>> doctest._unittest_reportflags == (REPORT_NDIFF | ... REPORT_ONLY_FIRST_FAILURE) True Only reporting flags can be set: - >>> set_unittest_reportflags(ELLIPSIS) + >>> doctest.set_unittest_reportflags(ELLIPSIS) Traceback (most recent call last): ... ValueError: ('Only reporting flags allowed', 8) - >>> set_unittest_reportflags(old) == (REPORT_NDIFF | + >>> doctest.set_unittest_reportflags(old) == (REPORT_NDIFF | ... REPORT_ONLY_FIRST_FAILURE) True """ @@ -2476,6 +2476,7 @@ blah # # Ho hum + """ output = [] for piece in DocTestParser().parse(s): @@ -2498,7 +2499,8 @@ while output and output[0] == '#': output.pop(0) # Combine the output, and return it. - return '\n'.join(output) + # Add a courtesy newline to prevent exec from choking (see bug #1172785) + return '\n'.join(output) + '\n' def testsource(module, name): """Extract the test sources from a doctest docstring as a script. Index: dumbdbm.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/dumbdbm.py,v retrieving revision 1.19.2.2 retrieving revision 1.19.2.3 diff -u -d -r1.19.2.2 -r1.19.2.3 --- dumbdbm.py 7 Jan 2005 06:58:03 -0000 1.19.2.2 +++ dumbdbm.py 16 Oct 2005 05:23:59 -0000 1.19.2.3 @@ -81,6 +81,7 @@ pass else: for line in f: + line = line.rstrip() key, pos_and_siz_pair = eval(line) self._index[key] = pos_and_siz_pair f.close() Index: ftplib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/ftplib.py,v retrieving revision 1.72.2.1 retrieving revision 1.72.2.2 diff -u -d -r1.72.2.1 -r1.72.2.2 --- ftplib.py 7 Jan 2005 06:58:03 -0000 1.72.2.1 +++ ftplib.py 16 Oct 2005 05:23:59 -0000 1.72.2.2 @@ -208,13 +208,13 @@ if self.debugging: print '*resp*', self.sanitize(resp) self.lastresp = resp[:3] c = resp[:1] + if c in ('1', '2', '3'): + return resp if c == '4': raise error_temp, resp if c == '5': raise error_perm, resp - if c not in '123': - raise error_proto, resp - return resp + raise error_proto, resp def voidresp(self): """Expect a response beginning with '2'.""" @@ -582,17 +582,17 @@ Raises error_proto if it does not contain '(|||port|)' Return ('host.addr.as.numbers', port#) tuple.''' - if resp[:3] <> '229': + if resp[:3] != '229': raise error_reply, resp left = resp.find('(') if left < 0: raise error_proto, resp right = resp.find(')', left + 1) if right < 0: raise error_proto, resp # should contain '(|||port|)' - if resp[left + 1] <> resp[right - 1]: + if resp[left + 1] != resp[right - 1]: raise error_proto, resp parts = resp[left + 1:right].split(resp[left+1]) - if len(parts) <> 5: + if len(parts) != 5: raise error_proto, resp host = peer[0] port = int(parts[3]) @@ -755,7 +755,16 @@ def test(): '''Test program. - Usage: ftp [-d] [-r[file]] host [-l[dir]] [-d[dir]] [-p] [file] ...''' + Usage: ftp [-d] [-r[file]] host [-l[dir]] [-d[dir]] [-p] [file] ... + + -d dir + -l list + -p password + ''' + + if len(sys.argv) < 2: + print test.__doc__ + sys.exit(0) debugging = 0 rcfile = None Index: glob.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/glob.py,v retrieving revision 1.10.20.1 retrieving revision 1.10.20.2 diff -u -d -r1.10.20.1 -r1.10.20.2 --- glob.py 7 Jan 2005 06:58:04 -0000 1.10.20.1 +++ glob.py 16 Oct 2005 05:23:59 -0000 1.10.20.2 @@ -4,7 +4,7 @@ import fnmatch import re -__all__ = ["glob"] +__all__ = ["glob", "iglob"] def glob(pathname): """Return a list of paths matching a pathname pattern. @@ -12,35 +12,42 @@ The pattern may contain simple shell-style wildcards a la fnmatch. """ + return list(iglob(pathname)) + +def iglob(pathname): + """Return a list of paths matching a pathname pattern. + + The pattern may contain simple shell-style wildcards a la fnmatch. + + """ if not has_magic(pathname): if os.path.lexists(pathname): - return [pathname] - else: - return [] + yield pathname + return dirname, basename = os.path.split(pathname) if not dirname: - return glob1(os.curdir, basename) - elif has_magic(dirname): - list = glob(dirname) + for name in glob1(os.curdir, basename): + yield name + return + if has_magic(dirname): + dirs = iglob(dirname) else: - list = [dirname] - if not has_magic(basename): - result = [] - for dirname in list: - if basename or os.path.isdir(dirname): - name = os.path.join(dirname, basename) - if os.path.lexists(name): - result.append(name) + dirs = [dirname] + if has_magic(basename): + glob_in_dir = glob1 else: - result = [] - for dirname in list: - sublist = glob1(dirname, basename) - for name in sublist: - result.append(os.path.join(dirname, name)) - return result + glob_in_dir = glob0 + for dirname in dirs: + for name in glob_in_dir(dirname, basename): + yield os.path.join(dirname, name) + +# These 2 helper functions non-recursively glob inside a literal directory. +# They return a list of basenames. `glob1` accepts a pattern while `glob0` +# takes a literal basename (so it only has to check for its existence). def glob1(dirname, pattern): - if not dirname: dirname = os.curdir + if not dirname: + dirname = os.curdir try: names = os.listdir(dirname) except os.error: @@ -49,6 +56,17 @@ names=filter(lambda x: x[0]!='.',names) return fnmatch.filter(names,pattern) +def glob0(dirname, basename): + if basename == '': + # `os.path.split()` returns an empty basename for paths ending with a + # directory separator. 'q*x/' should match only directories. + if os.isdir(dirname): + return [basename] + else: + if os.path.lexists(os.path.join(dirname, basename)): + return [basename] + return [] + magic_check = re.compile('[*?[]') Index: gzip.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/gzip.py,v retrieving revision 1.34.2.2 retrieving revision 1.34.2.3 diff -u -d -r1.34.2.2 -r1.34.2.3 --- gzip.py 7 Jan 2005 06:58:04 -0000 1.34.2.2 +++ gzip.py 16 Oct 2005 05:23:59 -0000 1.34.2.3 @@ -55,6 +55,7 @@ """ myfileobj = None + max_read_chunk = 10 * 1024 * 1024 # 10Mb def __init__(self, filename=None, mode=None, compresslevel=9, fileobj=None): @@ -215,14 +216,14 @@ try: while True: self._read(readsize) - readsize = readsize * 2 + readsize = min(self.max_read_chunk, readsize * 2) except EOFError: size = self.extrasize else: # just get some more of it try: while size > self.extrasize: self._read(readsize) - readsize = readsize * 2 + readsize = min(self.max_read_chunk, readsize * 2) except EOFError: if size > self.extrasize: size = self.extrasize @@ -331,7 +332,10 @@ return self.close() - def flush(self): + def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH): + if self.mode == WRITE: + # Ensure the compressor's buffer is flushed + self.fileobj.write(self.compress.flush(zlib_mode)) self.fileobj.flush() def fileno(self): Index: hmac.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/hmac.py,v retrieving revision 1.7.2.1 retrieving revision 1.7.2.2 diff -u -d -r1.7.2.1 -r1.7.2.2 --- hmac.py 7 Jan 2005 06:58:04 -0000 1.7.2.1 +++ hmac.py 16 Oct 2005 05:23:59 -0000 1.7.2.2 @@ -28,27 +28,33 @@ key: key for the keyed hash object. msg: Initial input for the hash, if provided. - digestmod: A module supporting PEP 247. Defaults to the md5 module. + digestmod: A module supporting PEP 247. *OR* + A hashlib constructor returning a new hash object. + Defaults to hashlib.md5. """ if key is _secret_backdoor_key: # cheap return if digestmod is None: - import md5 - digestmod = md5 + import hashlib + digestmod = hashlib.md5 - self.digestmod = digestmod - self.outer = digestmod.new() - self.inner = digestmod.new() - self.digest_size = digestmod.digest_size + if callable(digestmod): + self.digest_cons = digestmod + else: + self.digest_cons = lambda d='': digestmod.new(d) + + self.outer = self.digest_cons() + self.inner = self.digest_cons() + self.digest_size = self.inner.digest_size blocksize = 64 ipad = "\x36" * blocksize opad = "\x5C" * blocksize if len(key) > blocksize: - key = digestmod.new(key).digest() + key = self.digest_cons(key).digest() key = key + chr(0) * (blocksize - len(key)) self.outer.update(_strxor(key, opad)) @@ -70,7 +76,7 @@ An update to this copy won't affect the original object. """ other = HMAC(_secret_backdoor_key) - other.digestmod = self.digestmod + other.digest_cons = self.digest_cons other.digest_size = self.digest_size other.inner = self.inner.copy() other.outer = self.outer.copy() Index: httplib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/httplib.py,v retrieving revision 1.54.2.2 retrieving revision 1.54.2.3 diff -u -d -r1.54.2.2 -r1.54.2.3 --- httplib.py 7 Jan 2005 06:58:04 -0000 1.54.2.2 +++ httplib.py 16 Oct 2005 05:23:59 -0000 1.54.2.3 @@ -153,6 +153,9 @@ INSUFFICIENT_STORAGE = 507 NOT_EXTENDED = 510 +# maximal amount of data to read at one time in _safe_read +MAXAMOUNT = 1048576 + class HTTPMessage(mimetools.Message): def addheader(self, key, value): @@ -353,6 +356,7 @@ raise UnknownProtocol(version) if self.version == 9: + self.length = None self.chunked = 0 self.will_close = 1 self.msg = HTTPMessage(StringIO()) @@ -540,14 +544,14 @@ reading. If the bytes are truly not available (due to EOF), then the IncompleteRead exception can be used to detect the problem. """ - s = '' + s = [] while amt > 0: - chunk = self.fp.read(amt) + chunk = self.fp.read(min(amt, MAXAMOUNT)) if not chunk: raise IncompleteRead(s) - s += chunk + s.append(chunk) amt -= len(chunk) - return s + return ''.join(s) def getheader(self, name, default=None): if self.msg is None: Index: imaplib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/imaplib.py,v retrieving revision 1.54.2.2 retrieving revision 1.54.2.3 diff -u -d -r1.54.2.2 -r1.54.2.3 --- imaplib.py 7 Jan 2005 06:58:04 -0000 1.54.2.2 +++ imaplib.py 16 Oct 2005 05:23:59 -0000 1.54.2.3 @@ -18,8 +18,9 @@ # IMAP4_SSL contributed by Tino Lange March 2002. # GET/SETQUOTA contributed by Andreas Zeidler June 2002. # PROXYAUTH contributed by Rick Holbert November 2002. +# GET/SETANNOTATION contributed by Tomas Lindroos June 2005. -__version__ = "2.55" +__version__ = "2.58" import binascii, os, random, re, socket, sys, time @@ -51,6 +52,7 @@ 'EXPUNGE': ('SELECTED',), 'FETCH': ('SELECTED',), 'GETACL': ('AUTH', 'SELECTED'), + 'GETANNOTATION':('AUTH', 'SELECTED'), 'GETQUOTA': ('AUTH', 'SELECTED'), 'GETQUOTAROOT': ('AUTH', 'SELECTED'), 'MYRIGHTS': ('AUTH', 'SELECTED'), @@ -66,6 +68,7 @@ 'SEARCH': ('SELECTED',), 'SELECT': ('AUTH', 'SELECTED'), 'SETACL': ('AUTH', 'SELECTED'), + 'SETANNOTATION':('AUTH', 'SELECTED'), 'SETQUOTA': ('AUTH', 'SELECTED'), 'SORT': ('SELECTED',), 'STATUS': ('AUTH', 'SELECTED'), @@ -81,7 +84,7 @@ Continuation = re.compile(r'\+( (?P.*))?') Flags = re.compile(r'.*FLAGS \((?P[^\)]*)\)') InternalDate = re.compile(r'.*INTERNALDATE "' - r'(?P[ 123][0-9])-(?P[A-Z][a-z][a-z])-(?P[0-9][0-9][0-9][0-9])' + r'(?P[ 0123][0-9])-(?P[A-Z][a-z][a-z])-(?P[0-9][0-9][0-9][0-9])' r' (?P[0-9][0-9]):(?P[0-9][0-9]):(?P[0-9][0-9])' r' (?P[-+])(?P[0-9][0-9])(?P[0-9][0-9])' r'"') @@ -133,10 +136,10 @@ the command re-tried. "readonly" exceptions imply the command should be re-tried. - Note: to use this module, you must read the RFCs pertaining - to the IMAP4 protocol, as the semantics of the arguments to - each IMAP4 command are left to the invoker, not to mention - the results. + Note: to use this module, you must read the RFCs pertaining to the + IMAP4 protocol, as the semantics of the arguments to each IMAP4 + command are left to the invoker, not to mention the results. Also, + most IMAP servers implement a sub-set of the commands available here. """ class error(Exception): pass # Logical errors - debug required @@ -152,7 +155,7 @@ self.tagged_commands = {} # Tagged commands awaiting response self.untagged_responses = {} # {typ: [data, ...], ...} self.continuation_response = '' # Last continuation response - self.is_readonly = None # READ-ONLY desired state + self.is_readonly = False # READ-ONLY desired state self.tagnum = 0 # Open socket to server. @@ -162,7 +165,7 @@ # Create unique tag for this session, # and compile tagged response matcher. - self.tagpre = Int2AP(random.randint(0, 31999)) + self.tagpre = Int2AP(random.randint(4096, 65535)) self.tagre = re.compile(r'(?P' + self.tagpre + r'\d+) (?P[A-Z]+) (?P.*)') @@ -186,11 +189,10 @@ else: raise self.error(self.welcome) - cap = 'CAPABILITY' - self._simple_command(cap) - if not cap in self.untagged_responses: + typ, dat = self.capability() + if dat == [None]: raise self.error('no CAPABILITY response from server') - self.capabilities = tuple(self.untagged_responses[cap][-1].upper().split()) + self.capabilities = tuple(dat[-1].upper().split()) if __debug__: if self.debug >= 3: @@ -345,6 +347,15 @@ return typ, dat + def capability(self): + """(typ, [data]) = .capability() + Fetch capabilities list from server.""" + + name = 'CAPABILITY' + typ, dat = self._simple_command(name) + return self._untagged_response(typ, dat, name) + + def check(self): """Checkpoint mailbox on server. @@ -436,6 +447,14 @@ return self._untagged_response(typ, dat, 'ACL') + def getannotation(self, mailbox, entry, attribute): + """(typ, [data]) = .getannotation(mailbox, entry, attribute) + Retrieve ANNOTATIONs.""" + + typ, dat = self._simple_command('GETANNOTATION', mailbox, entry, attribute) + return self._untagged_response(typ, dat, 'ANNOTATION') + + def getquota(self, root): """Get the quota root's resource usage and limits. @@ -603,12 +622,12 @@ return self._untagged_response(typ, dat, name) - def select(self, mailbox='INBOX', readonly=None): + def select(self, mailbox='INBOX', readonly=False): """Select a mailbox. Flush all untagged responses. - (typ, [data]) = .select(mailbox='INBOX', readonly=None) + (typ, [data]) = .select(mailbox='INBOX', readonly=False) 'data' is count of messages in mailbox ('EXISTS' response). @@ -617,7 +636,7 @@ """ self.untagged_responses = {} # Flush old responses. self.is_readonly = readonly - if readonly is not None: + if readonly: name = 'EXAMINE' else: name = 'SELECT' @@ -643,6 +662,14 @@ return self._simple_command('SETACL', mailbox, who, what) + def setannotation(self, *args): + """(typ, [data]) = .setannotation(mailbox[, entry, attribute]+) + Set ANNOTATIONs.""" + + typ, dat = self._simple_command('SETANNOTATION', *args) + return self._untagged_response(typ, dat, 'ANNOTATION') + + def setquota(self, root, limits): """Set the quota root's resource limits. Index: imghdr.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/imghdr.py,v retrieving revision 1.11 retrieving revision 1.11.26.1 diff -u -d -r1.11 -r1.11.26.1 --- imghdr.py 24 Jan 2001 06:27:27 -0000 1.11 +++ imghdr.py 16 Oct 2005 05:23:59 -0000 1.11.26.1 @@ -101,6 +101,13 @@ tests.append(test_jpeg) +def test_exif(h, f): + """JPEG data in Exif format""" + if h[6:10] == 'Exif': + return 'jpeg' + +tests.append(test_exif) + def test_bmp(h, f): if h[:2] == 'BM': return 'bmp' Index: inspect.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/inspect.py,v retrieving revision 1.36.2.2 retrieving revision 1.36.2.3 diff -u -d -r1.36.2.2 -r1.36.2.3 --- inspect.py 7 Jan 2005 06:58:05 -0000 1.36.2.2 +++ inspect.py 16 Oct 2005 05:23:59 -0000 1.36.2.3 @@ -29,6 +29,7 @@ __date__ = '1 Jan 2001' import sys, os, types, string, re, dis, imp, tokenize, linecache +from operator import attrgetter # ----------------------------------------------------------- type-checking def ismodule(object): @@ -346,7 +347,7 @@ def getsourcefile(object): """Return the Python source file an object was defined in, if it exists.""" filename = getfile(object) - if string.lower(filename[-4:]) in ['.pyc', '.pyo']: + if string.lower(filename[-4:]) in ('.pyc', '.pyo'): filename = filename[:-4] + '.py' for suffix, mode, kind in imp.get_suffixes(): if 'b' in mode and string.lower(filename[-len(suffix):]) == suffix: @@ -453,7 +454,7 @@ # Look for a comment block at the top of the file. start = 0 if lines and lines[0][:2] == '#!': start = 1 - while start < len(lines) and string.strip(lines[start]) in ['', '#']: + while start < len(lines) and string.strip(lines[start]) in ('', '#'): start = start + 1 if start < len(lines) and lines[start][:1] == '#': comments = [] @@ -484,42 +485,30 @@ comments[-1:] = [] return string.join(comments, '') -class ListReader: - """Provide a readline() method to return lines from a list of strings.""" - def __init__(self, lines): - self.lines = lines - self.index = 0 - - def readline(self): - i = self.index - if i < len(self.lines): - self.index = i + 1 - return self.lines[i] - else: return '' - class EndOfBlock(Exception): pass class BlockFinder: """Provide a tokeneater() method to detect the end of a code block.""" def __init__(self): self.indent = 0 + self.islambda = False self.started = False self.passline = False - self.last = 0 + self.last = 1 def tokeneater(self, type, token, (srow, scol), (erow, ecol), line): if not self.started: + # look for the first "def", "class" or "lambda" if token in ("def", "class", "lambda"): - lastcolon = line.rfind(":") - if lastcolon: - oneline = re.search(r"\w", line[lastcolon:]) - if oneline and line[-2:] != "\\\n": - raise EndOfBlock, srow + if token == "lambda": + self.islambda = True self.started = True - self.passline = True + self.passline = True # skip to the end of the line elif type == tokenize.NEWLINE: - self.passline = False + self.passline = False # stop skipping when a NEWLINE is seen self.last = srow + if self.islambda: # lambdas always end at the first NEWLINE + raise EndOfBlock elif self.passline: pass elif type == tokenize.INDENT: @@ -527,19 +516,24 @@ self.passline = True elif type == tokenize.DEDENT: self.indent = self.indent - 1 - if self.indent == 0: - raise EndOfBlock, self.last - elif type == tokenize.NAME and scol == 0: - raise EndOfBlock, self.last + # the end of matching indent/dedent pairs end a block + # (note that this only works for "def"/"class" blocks, + # not e.g. for "if: else:" or "try: finally:" blocks) + if self.indent <= 0: + raise EndOfBlock + elif self.indent == 0 and type not in (tokenize.COMMENT, tokenize.NL): + # any other token on the same indentation level end the previous + # block as well, except the pseudo-tokens COMMENT and NL. + raise EndOfBlock def getblock(lines): """Extract the block of code at the top of the given list of lines.""" + blockfinder = BlockFinder() try: - tokenize.tokenize(ListReader(lines).readline, BlockFinder().tokeneater) - except EndOfBlock, eob: - return lines[:eob.args[0]] - # Fooling the indent/dedent logic implies a one-line definition - return lines[:1] + tokenize.tokenize(iter(lines).next, blockfinder.tokeneater) + except (EndOfBlock, IndentationError): + pass + return lines[:blockfinder.last] def getsourcelines(object): """Return a list of source lines and starting line number for an object. @@ -567,7 +561,7 @@ def walktree(classes, children, parent): """Recursive helper function for getclasstree().""" results = [] - classes.sort(key=lambda c: (c.__module__, c.__name__)) + classes.sort(key=attrgetter('__module__', '__name__')) for c in classes: results.append((c, c.__bases__)) if c in children: @@ -621,7 +615,7 @@ # The following acrobatics are for anonymous (tuple) arguments. for i in range(nargs): - if args[i][:1] in ['', '.']: + if args[i][:1] in ('', '.'): stack, remain, count = [], [], [] while step < len(code): op = ord(code[step]) @@ -630,7 +624,7 @@ opname = dis.opname[op] value = ord(code[step]) + ord(code[step+1])*256 step = step + 2 - if opname in ['UNPACK_TUPLE', 'UNPACK_SEQUENCE']: + if opname in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'): remain.append(value) count.append(value) elif opname == 'STORE_FAST': @@ -696,7 +690,7 @@ def strseq(object, convert, join=joinseq): """Recursively walk a sequence, stringifying each element.""" - if type(object) in [types.ListType, types.TupleType]: + if type(object) in (list, tuple): return join(map(lambda o, c=convert, j=join: strseq(o, c, j), object)) else: return convert(object) Index: locale.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/locale.py,v retrieving revision 1.20.2.2 retrieving revision 1.20.2.3 diff -u -d -r1.20.2.2 -r1.20.2.3 --- locale.py 7 Jan 2005 06:58:05 -0000 1.20.2.2 +++ locale.py 16 Oct 2005 05:23:59 -0000 1.20.2.3 @@ -306,7 +306,7 @@ else: return language + '.' + encoding -def getdefaultlocale(envvars=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')): +def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')): """ Tries to determine the default locale settings and returns them as tuple (language code, encoding). @@ -351,6 +351,8 @@ for variable in envvars: localename = lookup(variable,None) if localename: + if variable == 'LANGUAGE': + localename = localename.split(':')[0] break else: localename = 'C' Index: macpath.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/macpath.py,v retrieving revision 1.39.2.2 retrieving revision 1.39.2.3 diff -u -d -r1.39.2.2 -r1.39.2.3 --- macpath.py 7 Jan 2005 06:58:05 -0000 1.39.2.2 +++ macpath.py 16 Oct 2005 05:23:59 -0000 1.39.2.3 @@ -5,7 +5,7 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", - "getatime","getctime", "islink","exists","isdir","isfile", + "getatime","getctime", "islink","exists","lexists","isdir","isfile", "walk","expanduser","expandvars","normpath","abspath", "curdir","pardir","sep","pathsep","defpath","altsep","extsep", "devnull","realpath","supports_unicode_filenames"] @@ -175,14 +175,14 @@ def commonprefix(m): "Given a list of pathnames, returns the longest common leading component" if not m: return '' - prefix = m[0] - for item in m: - for i in range(len(prefix)): - if prefix[:i+1] != item[:i+1]: - prefix = prefix[:i] - if i == 0: return '' - break - return prefix + s1 = min(m) + s2 = max(m) + n = min(len(s1), len(s2)) + for i in xrange(n): + if s1[i] != s2[i]: + return s1[:i] + return s1[:n] + def expandvars(path): """Dummy to retain interface-compatibility with other operating systems.""" Index: markupbase.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/markupbase.py,v retrieving revision 1.6.2.2 retrieving revision 1.6.2.3 diff -u -d -r1.6.2.2 -r1.6.2.3 --- markupbase.py 7 Jan 2005 06:58:05 -0000 1.6.2.2 +++ markupbase.py 16 Oct 2005 05:23:59 -0000 1.6.2.3 @@ -1,4 +1,10 @@ -"""Shared support for scanning document type declarations in HTML and XHTML.""" +"""Shared support for scanning document type declarations in HTML and XHTML. + +This module is used as a foundation for the HTMLParser and sgmllib +modules (indirectly, for htmllib as well). It has no documented +public API and should not be used directly. + +""" import re Index: mhlib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/mhlib.py,v retrieving revision 1.34.2.2 retrieving revision 1.34.2.3 diff -u -d -r1.34.2.2 -r1.34.2.3 --- mhlib.py 7 Jan 2005 06:58:05 -0000 1.34.2.2 +++ mhlib.py 16 Oct 2005 05:23:59 -0000 1.34.2.3 @@ -982,11 +982,11 @@ context = mh.getcontext() f = mh.openfolder(context) do('f.getcurrent()') - for seq in ['first', 'last', 'cur', '.', 'prev', 'next', + for seq in ('first', 'last', 'cur', '.', 'prev', 'next', 'first:3', 'last:3', 'cur:3', 'cur:-3', 'prev:3', 'next:3', '1:3', '1:-3', '100:3', '100:-3', '10000:3', '10000:-3', - 'all']: + 'all'): try: do('f.parsesequence(%r)' % (seq,)) except Error, msg: Index: mimetypes.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/mimetypes.py,v retrieving revision 1.22.2.2 retrieving revision 1.22.2.3 diff -u -d -r1.22.2.2 -r1.22.2.3 --- mimetypes.py 7 Jan 2005 06:58:05 -0000 1.22.2.2 +++ mimetypes.py 16 Oct 2005 05:23:59 -0000 1.22.2.3 @@ -443,12 +443,14 @@ '.vcf' : 'text/x-vcard', '.wav' : 'audio/x-wav', '.wiz' : 'application/msword', + '.wsdl' : 'application/xml', '.xbm' : 'image/x-xbitmap', '.xlb' : 'application/vnd.ms-excel', # Duplicates :( '.xls' : 'application/excel', '.xls' : 'application/vnd.ms-excel', '.xml' : 'text/xml', + '.xpdl' : 'application/xml', '.xpm' : 'image/x-xpixmap', '.xsl' : 'application/xml', '.xwd' : 'image/x-xwindowdump', Index: nntplib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/nntplib.py,v retrieving revision 1.30.2.2 retrieving revision 1.30.2.3 diff -u -d -r1.30.2.2 -r1.30.2.3 --- nntplib.py 7 Jan 2005 06:58:06 -0000 1.30.2.2 +++ nntplib.py 16 Oct 2005 05:23:59 -0000 1.30.2.3 @@ -281,7 +281,7 @@ - time: string 'hhmmss' indicating the time Return: - resp: server response if successful - - list: list of article ids""" + - list: list of message ids""" cmd = 'NEWNEWS ' + group + ' ' + date + ' ' + time return self.longcmd(cmd, file) @@ -391,7 +391,7 @@ Returns: - resp: server response if successful - nr: the article number - - id: the article id""" + - id: the message id""" return self.statcmd('STAT ' + id) Index: ntpath.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/ntpath.py,v retrieving revision 1.49.2.2 retrieving revision 1.49.2.3 diff -u -d -r1.49.2.2 -r1.49.2.3 --- ntpath.py 7 Jan 2005 06:58:06 -0000 1.49.2.2 +++ ntpath.py 16 Oct 2005 05:23:59 -0000 1.49.2.3 @@ -11,10 +11,10 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", - "getatime","getctime", "islink","exists","isdir","isfile","ismount", - "walk","expanduser","expandvars","normpath","abspath","splitunc", - "curdir","pardir","sep","pathsep","defpath","altsep","extsep", - "devnull","realpath","supports_unicode_filenames"] + "getatime","getctime", "islink","exists","lexists","isdir","isfile", + "ismount","walk","expanduser","expandvars","normpath","abspath", + "splitunc","curdir","pardir","sep","pathsep","defpath","altsep", + "extsep","devnull","realpath","supports_unicode_filenames"] # strings representing various path-related bits and pieces curdir = '.' @@ -212,14 +212,13 @@ def commonprefix(m): "Given a list of pathnames, returns the longest common leading component" if not m: return '' - prefix = m[0] - for item in m: - for i in range(len(prefix)): - if prefix[:i+1] != item[:i+1]: - prefix = prefix[:i] - if i == 0: return '' - break - return prefix + s1 = min(m) + s2 = max(m) + n = min(len(s1), len(s2)) + for i in xrange(n): + if s1[i] != s2[i]: + return s1[:i] + return s1[:n] # Get size, mtime, atime of files. Index: optparse.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/optparse.py,v retrieving revision 1.4.4.2 retrieving revision 1.4.4.3 diff -u -d -r1.4.4.2 -r1.4.4.3 --- optparse.py 7 Jan 2005 06:58:06 -0000 1.4.4.2 +++ optparse.py 16 Oct 2005 05:23:59 -0000 1.4.4.3 @@ -67,7 +67,6 @@ """ import sys, os -import types import textwrap try: from gettext import gettext as _ @@ -590,7 +589,7 @@ if self.choices is None: raise OptionError( "must supply a list of choices for type 'choice'", self) - elif type(self.choices) not in (types.TupleType, types.ListType): + elif type(self.choices) not in (tuple, list): raise OptionError( "choices must be a list of strings ('%s' supplied)" % str(type(self.choices)).split("'")[1], self) @@ -634,12 +633,12 @@ raise OptionError( "callback not callable: %r" % self.callback, self) if (self.callback_args is not None and - type(self.callback_args) is not types.TupleType): + type(self.callback_args) is not tuple): raise OptionError( "callback_args, if supplied, must be a tuple: not %r" % self.callback_args, self) if (self.callback_kwargs is not None and - type(self.callback_kwargs) is not types.DictType): + type(self.callback_kwargs) is not dict): raise OptionError( "callback_kwargs, if supplied, must be a dict: not %r" % self.callback_kwargs, self) @@ -927,7 +926,7 @@ """add_option(Option) add_option(opt_str, ..., kwarg=val, ...) """ - if type(args[0]) is types.StringType: + if type(args[0]) is str: option = self.option_class(*args, **kwargs) elif len(args) == 1 and not kwargs: option = args[0] @@ -1213,7 +1212,7 @@ def add_option_group(self, *args, **kwargs): # XXX lots of overlap with OptionContainer.add_option() - if type(args[0]) is types.StringType: + if type(args[0]) is str: group = OptionGroup(self, *args, **kwargs) elif len(args) == 1 and not kwargs: group = args[0] Index: os.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/os.py,v retrieving revision 1.58.2.3 retrieving revision 1.58.2.4 diff -u -d -r1.58.2.3 -r1.58.2.4 --- os.py 7 Jan 2005 06:58:07 -0000 1.58.2.3 +++ os.py 16 Oct 2005 05:23:59 -0000 1.58.2.4 @@ -29,7 +29,8 @@ # Note: more names are added to __all__ later. __all__ = ["altsep", "curdir", "pardir", "sep", "pathsep", "linesep", - "defpath", "name", "path", "devnull"] + "defpath", "name", "path", "devnull", + "SEEK_SET", "SEEK_CUR", "SEEK_END"] def _get_exports_list(module): try: @@ -135,6 +136,12 @@ del _names +# Python uses fixed values for the SEEK_ constants; they are mapped +# to native constants if necessary in posixmodule.c +SEEK_SET = 0 +SEEK_CUR = 1 +SEEK_END = 2 + #' # Super directory utilities. @@ -435,6 +442,22 @@ return key.upper() in self.data def get(self, key, failobj=None): return self.data.get(key.upper(), failobj) + def update(self, dict=None, **kwargs): + if dict: + try: + keys = dict.keys() + except AttributeError: + # List of (key, value) + for k, v in dict: + self[k] = v + else: + # got keys + # cannot use items(), since mappings + # may not have them. + for k in keys: + self[k] = dict[k] + if kwargs: + self.update(kwargs) def copy(self): return dict(self) @@ -446,6 +469,22 @@ def __setitem__(self, key, item): putenv(key, item) self.data[key] = item + def update(self, dict=None, **kwargs): + if dict: + try: + keys = dict.keys() + except AttributeError: + # List of (key, value) + for k, v in dict: + self[k] = v + else: + # got keys + # cannot use items(), since mappings + # may not have them. + for k in keys: + self[k] = dict[k] + if kwargs: + self.update(kwargs) try: unsetenv except NameError: @@ -676,22 +715,18 @@ pass if not _exists("urandom"): - _urandomfd = None def urandom(n): """urandom(n) -> str Return a string of n random bytes suitable for cryptographic use. """ - global _urandomfd - if _urandomfd is None: - try: - _urandomfd = open("/dev/urandom", O_RDONLY) - except: - _urandomfd = NotImplementedError - if _urandomfd is NotImplementedError: + try: + _urandomfd = open("/dev/urandom", O_RDONLY) + except: raise NotImplementedError("/dev/urandom (or equivalent) not found") bytes = "" while len(bytes) < n: bytes += read(_urandomfd, n - len(bytes)) + close(_urandomfd) return bytes Index: os2emxpath.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/os2emxpath.py,v retrieving revision 1.6.2.2 retrieving revision 1.6.2.3 diff -u -d -r1.6.2.2 -r1.6.2.3 --- os2emxpath.py 7 Jan 2005 06:58:07 -0000 1.6.2.2 +++ os2emxpath.py 16 Oct 2005 05:23:59 -0000 1.6.2.3 @@ -10,10 +10,10 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", - "getatime","getctime", "islink","exists","isdir","isfile","ismount", - "walk","expanduser","expandvars","normpath","abspath","splitunc", - "curdir","pardir","sep","pathsep","defpath","altsep","extsep", - "devnull","realpath","supports_unicode_filenames"] + "getatime","getctime", "islink","exists","lexists","isdir","isfile", + "ismount","walk","expanduser","expandvars","normpath","abspath", + "splitunc","curdir","pardir","sep","pathsep","defpath","altsep", + "extsep","devnull","realpath","supports_unicode_filenames"] # strings representing various path-related bits and pieces curdir = '.' @@ -173,14 +173,13 @@ def commonprefix(m): "Given a list of pathnames, returns the longest common leading component" if not m: return '' - prefix = m[0] - for item in m: - for i in range(len(prefix)): - if prefix[:i+1] != item[:i+1]: - prefix = prefix[:i] - if i == 0: return '' - break - return prefix + s1 = min(m) + s2 = max(m) + n = min(len(s1), len(s2)) + for i in xrange(n): + if s1[i] != s2[i]: + return s1[:i] + return s1[:n] # Get size, mtime, atime of files. Index: pdb.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/pdb.py,v retrieving revision 1.53.2.2 retrieving revision 1.53.2.3 diff -u -d -r1.53.2.2 -r1.53.2.3 --- pdb.py 7 Jan 2005 06:58:07 -0000 1.53.2.2 +++ pdb.py 16 Oct 2005 05:23:59 -0000 1.53.2.3 @@ -450,11 +450,14 @@ return numberlist = arg.split() for i in numberlist: + if not (0 <= i < len(bdb.Breakpoint.bpbynumber)): + print 'No breakpoint numbered', i + continue err = self.clear_bpbynumber(i) if err: print '***', err else: - print 'Deleted breakpoint %s ' % (i,) + print 'Deleted breakpoint', i do_cl = do_clear # 'c' is already an abbreviation for 'continue' def do_where(self, arg): Index: pickletools.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/pickletools.py,v retrieving revision 1.26.6.2 retrieving revision 1.26.6.3 diff -u -d -r1.26.6.2 -r1.26.6.3 --- pickletools.py 7 Jan 2005 06:58:07 -0000 1.26.6.2 +++ pickletools.py 16 Oct 2005 05:23:59 -0000 1.26.6.3 @@ -1996,6 +1996,11 @@ if stack: raise ValueError("stack not empty after STOP: %r" % stack) +# For use in the doctest, simply as an example of a class to pickle. +class _Example: + def __init__(self, value): + self.value = value + _dis_test = r""" >>> import pickle >>> x = [1, 2, (3, 4), {'abc': u"def"}] @@ -2060,27 +2065,27 @@ 18: . STOP highest protocol among opcodes = 0 ->>> x = [pickle.PicklingError()] * 2 +>>> from pickletools import _Example +>>> x = [_Example(42)] * 2 >>> dis(pickle.dumps(x, 0)) 0: ( MARK 1: l LIST (MARK at 0) 2: p PUT 0 5: ( MARK - 6: i INST 'pickle PicklingError' (MARK at 5) + 6: i INST 'pickletools _Example' (MARK at 5) 28: p PUT 1 31: ( MARK 32: d DICT (MARK at 31) 33: p PUT 2 - 36: S STRING 'args' - 44: p PUT 3 - 47: ( MARK - 48: t TUPLE (MARK at 47) - 49: s SETITEM - 50: b BUILD - 51: a APPEND - 52: g GET 1 - 55: a APPEND - 56: . STOP + 36: S STRING 'value' + 45: p PUT 3 + 48: I INT 42 + 52: s SETITEM + 53: b BUILD + 54: a APPEND + 55: g GET 1 + 58: a APPEND + 59: . STOP highest protocol among opcodes = 0 >>> dis(pickle.dumps(x, 1)) @@ -2088,20 +2093,20 @@ 1: q BINPUT 0 3: ( MARK 4: ( MARK - 5: c GLOBAL 'pickle PicklingError' + 5: c GLOBAL 'pickletools _Example' 27: q BINPUT 1 29: o OBJ (MARK at 4) 30: q BINPUT 2 32: } EMPTY_DICT 33: q BINPUT 3 - 35: U SHORT_BINSTRING 'args' - 41: q BINPUT 4 - 43: ) EMPTY_TUPLE - 44: s SETITEM - 45: b BUILD - 46: h BINGET 2 - 48: e APPENDS (MARK at 3) - 49: . STOP + 35: U SHORT_BINSTRING 'value' + 42: q BINPUT 4 + 44: K BININT1 42 + 46: s SETITEM + 47: b BUILD + 48: h BINGET 2 + 50: e APPENDS (MARK at 3) + 51: . STOP highest protocol among opcodes = 1 Try "the canonical" recursive-object test. Index: popen2.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/popen2.py,v retrieving revision 1.25.2.1 retrieving revision 1.25.2.2 diff -u -d -r1.25.2.1 -r1.25.2.2 --- popen2.py 7 Jan 2005 06:58:07 -0000 1.25.2.1 +++ popen2.py 16 Oct 2005 05:23:59 -0000 1.25.2.2 @@ -213,7 +213,7 @@ raise ValueError("wrote %r read %r" % (teststr, got)) got = e.read() if got: - raise ValueError("unexected %r on stderr" % (got,)) + raise ValueError("unexpected %r on stderr" % (got,)) for inst in _active[:]: inst.wait() if _active: Index: poplib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/poplib.py,v retrieving revision 1.21.2.1 retrieving revision 1.21.2.2 diff -u -d -r1.21.2.1 -r1.21.2.2 --- poplib.py 7 Jan 2005 06:58:07 -0000 1.21.2.1 +++ poplib.py 16 Oct 2005 05:23:59 -0000 1.21.2.2 @@ -219,7 +219,7 @@ """Request listing, return result. Result without a message number argument is in form - ['response', ['mesg_num octets', ...]]. + ['response', ['mesg_num octets', ...], octets]. Result when a message number argument is given is a single response: the "scan listing" for that message. Index: posixfile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/posixfile.py,v retrieving revision 1.24.10.1 retrieving revision 1.24.10.2 diff -u -d -r1.24.10.1 -r1.24.10.2 --- posixfile.py 7 Jan 2005 06:58:07 -0000 1.24.10.1 +++ posixfile.py 16 Oct 2005 05:23:59 -0000 1.24.10.2 @@ -179,10 +179,11 @@ if sys.platform in ('netbsd1', 'openbsd2', 'freebsd2', 'freebsd3', 'freebsd4', 'freebsd5', - 'freebsd6', 'bsdos2', 'bsdos3', 'bsdos4'): + 'freebsd6', 'freebsd7', + 'bsdos2', 'bsdos3', 'bsdos4'): flock = struct.pack('lxxxxlxxxxlhh', \ l_start, l_len, os.getpid(), l_type, l_whence) - elif sys.platform in ['aix3', 'aix4']: + elif sys.platform in ('aix3', 'aix4'): flock = struct.pack('hhlllii', \ l_type, l_whence, l_start, l_len, 0, 0, 0) else: @@ -198,7 +199,7 @@ 'bsdos2', 'bsdos3', 'bsdos4'): l_start, l_len, l_pid, l_type, l_whence = \ struct.unpack('lxxxxlxxxxlhh', flock) - elif sys.platform in ['aix3', 'aix4']: + elif sys.platform in ('aix3', 'aix4'): l_type, l_whence, l_start, l_len, l_sysid, l_pid, l_vfs = \ struct.unpack('hhlllii', flock) elif sys.platform == "linux2": Index: posixpath.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/posixpath.py,v retrieving revision 1.51.2.2 retrieving revision 1.51.2.3 diff -u -d -r1.51.2.2 -r1.51.2.3 --- posixpath.py 7 Jan 2005 06:58:07 -0000 1.51.2.2 +++ posixpath.py 16 Oct 2005 05:23:59 -0000 1.51.2.3 @@ -15,8 +15,8 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", - "getatime","getctime","islink","exists","isdir","isfile","ismount", - "walk","expanduser","expandvars","normpath","abspath", + "getatime","getctime","islink","exists","lexists","isdir","isfile", + "ismount","walk","expanduser","expandvars","normpath","abspath", "samefile","sameopenfile","samestat", "curdir","pardir","sep","pathsep","defpath","altsep","extsep", "devnull","realpath","supports_unicode_filenames"] @@ -414,7 +414,7 @@ if isabs(filename): bits = ['/'] + filename.split('/')[1:] else: - bits = filename.split('/') + bits = [''] + filename.split('/') for i in range(2, len(bits)+1): component = join(*bits[0:i]) Index: profile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/profile.py,v retrieving revision 1.47.2.2 retrieving revision 1.47.2.3 diff -u -d -r1.47.2.2 -r1.47.2.3 --- profile.py 7 Jan 2005 06:58:08 -0000 1.47.2.2 +++ profile.py 16 Oct 2005 05:23:59 -0000 1.47.2.3 @@ -4,8 +4,6 @@ # # Based on prior profile module by Sjoerd Mullender... # which was hacked somewhat by: Guido van Rossum -# -# See profile.doc for more information """Class for profiling Python code.""" @@ -94,18 +92,10 @@ else: return prof.print_stats() -# print help +# Backwards compatibility. def help(): - for dirname in sys.path: - fullname = os.path.join(dirname, 'profile.doc') - if os.path.exists(fullname): - sts = os.system('${PAGER-more} ' + fullname) - if sts: print '*** Pager exit status:', sts - break - else: - print 'Sorry, can\'t find the help file "profile.doc"', - print 'along the Python search path.' - + print "Documentation for the profile module can be found " + print "in the Python Library Reference, section 'The Python Profiler'." if os.name == "mac": import MacOS @@ -117,6 +107,20 @@ t = timer() return t[0] + t[1] +# Using getrusage(3) is better than clock(3) if available: +# on some systems (e.g. FreeBSD), getrusage has a higher resolution +# Furthermore, on a POSIX system, returns microseconds, which +# wrap around after 36min. +_has_res = 0 +try: + import resource + resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF) + def _get_time_resource(timer=resgetrusage): + t = timer() + return t[0] + t[1] + _has_res = 1 +except ImportError: + pass class Profile: """Profiler class. @@ -169,8 +173,12 @@ bias = self.bias self.bias = bias # Materialize in local dict for lookup speed. - if timer is None: - if os.name == 'mac': + if not timer: + if _has_res: + self.timer = resgetrusage + self.dispatcher = self.trace_dispatch + self.get_time = _get_time_resource + elif os.name == 'mac': self.timer = MacOS.GetTicks self.dispatcher = self.trace_dispatch_mac self.get_time = _get_time_mac @@ -360,7 +368,7 @@ "exception": trace_dispatch_exception, "return": trace_dispatch_return, "c_call": trace_dispatch_c_call, - "c_exception": trace_dispatch_exception, + "c_exception": trace_dispatch_return, # the C function returned "c_return": trace_dispatch_return, } @@ -583,26 +591,19 @@ def Stats(*args): print 'Report generating functions are in the "pstats" module\a' - -# When invoked as main program, invoke the profiler on a script -if __name__ == '__main__': +def main(): usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..." - if not sys.argv[1:]: - print "Usage: ", usage - sys.exit(2) - - class ProfileParser(OptionParser): - def __init__(self, usage): - OptionParser.__init__(self) - self.usage = usage - - parser = ProfileParser(usage) + parser = OptionParser(usage=usage) parser.allow_interspersed_args = False parser.add_option('-o', '--outfile', dest="outfile", help="Save stats to ", default=None) parser.add_option('-s', '--sort', dest="sort", help="Sort order when printing to stdout, based on pstats.Stats class", default=-1) + if not sys.argv[1:]: + parser.print_usage() + sys.exit(2) + (options, args) = parser.parse_args() sys.argv[:] = args @@ -610,4 +611,9 @@ sys.path.insert(0, os.path.dirname(sys.argv[0])) run('execfile(%r)' % (sys.argv[0],), options.outfile, options.sort) else: - print "Usage: ", usage + parser.print_usage() + return parser + +# When invoked as main program, invoke the profiler on a script +if __name__ == '__main__': + main() Index: py_compile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/py_compile.py,v retrieving revision 1.21.2.1 retrieving revision 1.21.2.2 diff -u -d -r1.21.2.1 -r1.21.2.2 --- py_compile.py 28 Apr 2003 17:32:10 -0000 1.21.2.1 +++ py_compile.py 16 Oct 2005 05:23:59 -0000 1.21.2.2 @@ -128,7 +128,7 @@ if doraise: raise py_exc else: - sys.stderr.write(py_exc.msg) + sys.stderr.write(py_exc.msg + '\n') return if cfile is None: cfile = file + (__debug__ and 'c' or 'o') Index: pydoc.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/pydoc.py,v retrieving revision 1.65.2.2 retrieving revision 1.65.2.3 diff -u -d -r1.65.2.2 -r1.65.2.3 --- pydoc.py 7 Jan 2005 06:58:08 -0000 1.65.2.2 +++ pydoc.py 16 Oct 2005 05:23:59 -0000 1.65.2.3 @@ -36,6 +36,7 @@ __author__ = "Ka-Ping Yee " __date__ = "26 February 2001" + __version__ = "$Revision$" __credits__ = """Guido van Rossum, for an excellent programming language. Tommy Burnette, the original creator of manpy. @@ -153,8 +154,8 @@ def visiblename(name, all=None): """Decide whether to show documentation on a variable.""" # Certain special names are redundant. - if name in ['__builtins__', '__doc__', '__file__', '__path__', - '__module__', '__name__']: return 0 + if name in ('__builtins__', '__doc__', '__file__', '__path__', + '__module__', '__name__', '__slots__'): return 0 # Private names are hidden, but special names are displayed. if name.startswith('__') and name.endswith('__'): return 1 if all is not None: @@ -163,12 +164,20 @@ else: return not name.startswith('_') +def classify_class_attrs(object): + """Wrap inspect.classify_class_attrs, with fixup for data descriptors.""" + def fixup((name, kind, cls, value)): + if inspect.isdatadescriptor(value): + kind = 'data descriptor' + return name, kind, cls, value + return map(fixup, inspect.classify_class_attrs(object)) + # ----------------------------------------------------- module manipulation def ispackage(path): """Guess whether a path refers to a package directory.""" if os.path.isdir(path): - for ext in ['.py', '.pyc', '.pyo']: + for ext in ('.py', '.pyc', '.pyo'): if os.path.isfile(os.path.join(path, '__init__' + ext)): return True return False @@ -718,13 +727,13 @@ push('\n') return attrs - def spillproperties(msg, attrs, predicate): + def spilldescriptors(msg, attrs, predicate): ok, attrs = _split_list(attrs, predicate) if ok: hr.maybe() push(msg) for name, kind, homecls, value in ok: - push(self._docproperty(name, value, mod)) + push(self._docdescriptor(name, value, mod)) return attrs def spilldata(msg, attrs, predicate): @@ -749,7 +758,7 @@ return attrs attrs = filter(lambda (name, kind, cls, value): visiblename(name), - inspect.classify_class_attrs(object)) + classify_class_attrs(object)) mdict = {} for key, kind, homecls, value in attrs: mdict[key] = anchor = '#' + name + '-' + key @@ -788,8 +797,8 @@ lambda t: t[1] == 'class method') attrs = spill('Static methods %s' % tag, attrs, lambda t: t[1] == 'static method') - attrs = spillproperties('Properties %s' % tag, attrs, - lambda t: t[1] == 'property') + attrs = spilldescriptors('Data descriptors %s' % tag, attrs, + lambda t: t[1] == 'data descriptor') attrs = spilldata('Data and other attributes %s' % tag, attrs, lambda t: t[1] == 'data') assert attrs == [] @@ -871,29 +880,22 @@ doc = doc and '
%s
' % doc return '
%s
%s
\n' % (decl, doc) - def _docproperty(self, name, value, mod): + def _docdescriptor(self, name, value, mod): results = [] push = results.append if name: push('
%s
\n' % name) if value.__doc__ is not None: - doc = self.markup(value.__doc__, self.preformat) + doc = self.markup(getdoc(value), self.preformat) push('
%s
\n' % doc) - for attr, tag in [('fget', 'get'), - ('fset', 'set'), - ('fdel', 'delete')]: - func = getattr(value, attr) - if func is not None: - base = self.document(func, tag, mod) - push('
%s
\n' % base) push('
\n') return ''.join(results) def docproperty(self, object, name=None, mod=None, cl=None): """Produce html documentation for a property.""" - return self._docproperty(name, object, mod) + return self._docdescriptor(name, object, mod) def docother(self, object, name=None, mod=None, *ignored): """Produce HTML documentation for a data object.""" @@ -1078,7 +1080,7 @@ if data: contents = [] for key, value in data: - contents.append(self.docother(value, key, name, 70)) + contents.append(self.docother(value, key, name, maxlen=70)) result = result + self.section('DATA', join(contents, '\n')) if hasattr(object, '__version__'): @@ -1143,13 +1145,13 @@ name, mod, object)) return attrs - def spillproperties(msg, attrs, predicate): + def spilldescriptors(msg, attrs, predicate): ok, attrs = _split_list(attrs, predicate) if ok: hr.maybe() push(msg) for name, kind, homecls, value in ok: - push(self._docproperty(name, value, mod)) + push(self._docdescriptor(name, value, mod)) return attrs def spilldata(msg, attrs, predicate): @@ -1159,15 +1161,15 @@ push(msg) for name, kind, homecls, value in ok: if callable(value) or inspect.isdatadescriptor(value): - doc = getattr(value, "__doc__", None) + doc = getdoc(value) else: doc = None push(self.docother(getattr(object, name), - name, mod, 70, doc) + '\n') + name, mod, maxlen=70, doc=doc) + '\n') return attrs attrs = filter(lambda (name, kind, cls, value): visiblename(name), - inspect.classify_class_attrs(object)) + classify_class_attrs(object)) while attrs: if mro: thisclass = mro.popleft() @@ -1195,8 +1197,8 @@ lambda t: t[1] == 'class method') attrs = spill("Static methods %s:\n" % tag, attrs, lambda t: t[1] == 'static method') - attrs = spillproperties("Properties %s:\n" % tag, attrs, - lambda t: t[1] == 'property') + attrs = spilldescriptors("Data descriptors %s:\n" % tag, attrs, + lambda t: t[1] == 'data descriptor') attrs = spilldata("Data and other attributes %s:\n" % tag, attrs, lambda t: t[1] == 'data') assert attrs == [] @@ -1254,35 +1256,24 @@ doc = getdoc(object) or '' return decl + '\n' + (doc and rstrip(self.indent(doc)) + '\n') - def _docproperty(self, name, value, mod): + def _docdescriptor(self, name, value, mod): results = [] push = results.append if name: - push(name) - need_blank_after_doc = 0 + push(self.bold(name)) + push('\n') doc = getdoc(value) or '' if doc: push(self.indent(doc)) - need_blank_after_doc = 1 - for attr, tag in [('fget', ''), - ('fset', ''), - ('fdel', '')]: - func = getattr(value, attr) - if func is not None: - if need_blank_after_doc: - push('') - need_blank_after_doc = 0 - base = self.document(func, tag, mod) - push(self.indent(base)) - - return '\n'.join(results) + push('\n') + return ''.join(results) def docproperty(self, object, name=None, mod=None, cl=None): """Produce text documentation for a property.""" - return self._docproperty(name, object, mod) + return self._docdescriptor(name, object, mod) - def docother(self, object, name=None, mod=None, maxlen=None, doc=None): + def docother(self, object, name=None, mod=None, parent=None, maxlen=None, doc=None): """Produce text documentation for a data object.""" repr = self.repr(object) if maxlen: @@ -1308,12 +1299,12 @@ return plainpager if not sys.stdin.isatty() or not sys.stdout.isatty(): return plainpager - if os.environ.get('TERM') in ['dumb', 'emacs']: + if os.environ.get('TERM') in ('dumb', 'emacs'): return plainpager if 'PAGER' in os.environ: if sys.platform == 'win32': # pipes completely broken in Windows return lambda text: tempfilepager(plain(text), os.environ['PAGER']) - elif os.environ.get('TERM') in ['dumb', 'emacs']: + elif os.environ.get('TERM') in ('dumb', 'emacs'): return lambda text: pipepager(plain(text), os.environ['PAGER']) else: return lambda text: pipepager(text, os.environ['PAGER']) @@ -1379,14 +1370,14 @@ sys.stdout.flush() c = getchar() - if c in ['q', 'Q']: + if c in ('q', 'Q'): sys.stdout.write('\r \r') break - elif c in ['\r', '\n']: + elif c in ('\r', '\n'): sys.stdout.write('\r \r' + lines[r] + '\n') r = r + 1 continue - if c in ['b', 'B', '\x1b']: + if c in ('b', 'B', '\x1b'): r = r - inc - inc if r < 0: r = 0 sys.stdout.write('\n' + join(lines[r:r+inc], '\n') + '\n') @@ -1464,6 +1455,14 @@ desc += ' in ' + name[:name.rfind('.')] elif module and module is not object: desc += ' in module ' + module.__name__ + if not (inspect.ismodule(object) or + inspect.isclass(object) or + inspect.isroutine(object) or + isinstance(object, property)): + # If the passed object is a piece of data or an instance, + # document its available methods instead of its value. + object = type(object) + desc += ' object' pager(title % desc + '\n\n' + text.document(object, name)) except (ImportError, ErrorDuringImport), value: print value @@ -1656,7 +1655,7 @@ except (KeyboardInterrupt, EOFError): break request = strip(replace(request, '"', '', "'", '')) - if lower(request) in ['q', 'quit']: break + if lower(request) in ('q', 'quit'): break self.help(request) def getline(self, prompt): Index: random.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/random.py,v retrieving revision 1.34.2.2 retrieving revision 1.34.2.3 diff -u -d -r1.34.2.2 -r1.34.2.3 --- random.py 7 Jan 2005 06:58:08 -0000 1.34.2.2 +++ random.py 16 Oct 2005 05:23:59 -0000 1.34.2.3 @@ -41,7 +41,7 @@ from warnings import warn as _warn from types import MethodType as _MethodType, BuiltinMethodType as _BuiltinMethodType -from math import log as _log, exp as _exp, pi as _pi, e as _e +from math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin from os import urandom as _urandom from binascii import hexlify as _hexlify @@ -286,15 +286,14 @@ """ # Sampling without replacement entails tracking either potential - # selections (the pool) in a list or previous selections in a - # dictionary. + # selections (the pool) in a list or previous selections in a set. # When the number of selections is small compared to the # population, then tracking selections is efficient, requiring - # only a small dictionary and an occasional reselection. For + # only a small set and an occasional reselection. For # a larger number of selections, the pool tracking method is # preferred since the list takes less space than the - # dictionary and it doesn't suffer from frequent reselections. + # set and it doesn't suffer from frequent reselections. n = len(population) if not 0 <= k <= n: @@ -302,7 +301,10 @@ random = self.random _int = int result = [None] * k - if n < 6 * k: # if n len list takes less space than a k len dict + setsize = 21 # size of a small set minus size of an empty list + if k > 5: + setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets + if n <= setsize: # is an n-length list smaller than a k-length set pool = list(population) for i in xrange(k): # invariant: non-selected at [0,n-i) j = _int(random() * (n-i)) @@ -311,14 +313,16 @@ else: try: n > 0 and (population[0], population[n//2], population[n-1]) - except (TypeError, KeyError): # handle sets and dictionaries + except (TypeError, KeyError): # handle non-sequence iterables population = tuple(population) - selected = {} + selected = set() + selected_add = selected.add for i in xrange(k): j = _int(random() * n) while j in selected: j = _int(random() * n) - result[i] = selected[j] = population[j] + selected_add(j) + result[i] = population[j] return result ## -------------------- real-valued distributions ------------------- @@ -345,7 +349,7 @@ # Math Software, 3, (1977), pp257-260. random = self.random - while True: + while 1: u1 = random() u2 = 1.0 - random() z = NV_MAGICCONST*(u1-0.5)/u2 @@ -415,7 +419,7 @@ b = (a - _sqrt(2.0 * a))/(2.0 * kappa) r = (1.0 + b * b)/(2.0 * b) - while True: + while 1: u1 = random() z = _cos(_pi * u1) @@ -424,7 +428,7 @@ u2 = random() - if not (u2 >= c * (2.0 - c) and u2 > c * _exp(1.0 - c)): + if u2 < c * (2.0 - c) or u2 <= c * _exp(1.0 - c): break u3 = random() @@ -462,7 +466,7 @@ bbb = alpha - LOG4 ccc = alpha + ainv - while True: + while 1: u1 = random() if not 1e-7 < u1 < .9999999: continue @@ -485,18 +489,19 @@ # Uses ALGORITHM GS of Statistical Computing - Kennedy & Gentle - while True: + while 1: u = random() b = (_e + alpha)/_e p = b*u if p <= 1.0: - x = pow(p, 1.0/alpha) + x = p ** (1.0/alpha) else: - # p > 1 x = -_log((b-p)/alpha) u1 = random() - if not (((p <= 1.0) and (u1 > _exp(-x))) or - ((p > 1) and (u1 > pow(x, alpha - 1.0)))): + if p > 1.0: + if u1 <= x ** (alpha - 1.0): + break + elif u1 <= _exp(-x): break return x * beta Index: reconvert.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/reconvert.py,v retrieving revision 1.6.16.1 retrieving revision 1.6.16.2 diff -u -d -r1.6.16.1 -r1.6.16.2 --- reconvert.py 7 Jan 2005 06:58:08 -0000 1.6.16.1 +++ reconvert.py 16 Oct 2005 05:23:59 -0000 1.6.16.2 @@ -166,7 +166,7 @@ if q in s and altq not in s: q = altq else: - assert quote in ('"', "'") + assert quote in ('"', "'", '"""', "'''") q = quote res = q for c in s: Index: rfc822.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/rfc822.py,v retrieving revision 1.72.2.2 retrieving revision 1.72.2.3 diff -u -d -r1.72.2.2 -r1.72.2.3 --- rfc822.py 7 Jan 2005 06:58:09 -0000 1.72.2.2 +++ rfc822.py 16 Oct 2005 05:23:59 -0000 1.72.2.3 @@ -90,8 +90,6 @@ fp.tell() except (AttributeError, IOError): seekable = 0 - else: - seekable = 1 self.fp = fp self.seekable = seekable self.startofheaders = None @@ -134,7 +132,7 @@ """ self.dict = {} self.unixfrom = '' - self.headers = list = [] + self.headers = lst = [] self.status = '' headerseen = "" firstline = 1 @@ -161,7 +159,7 @@ firstline = 0 if headerseen and line[0] in ' \t': # It's a continuation line. - list.append(line) + lst.append(line) x = (self.dict[headerseen] + "\n " + line.strip()) self.dict[headerseen] = x.strip() continue @@ -174,7 +172,7 @@ headerseen = self.isheader(line) if headerseen: # It's a legal header line, save it. - list.append(line) + lst.append(line) self.dict[headerseen] = line[len(headerseen)+1:].strip() continue else: @@ -202,8 +200,7 @@ i = line.find(':') if i > 0: return line[:i].lower() - else: - return None + return None def islast(self, line): """Determine whether a line is a legal end of RFC 2822 headers. @@ -235,7 +232,7 @@ """ name = name.lower() + ':' n = len(name) - list = [] + lst = [] hit = 0 for line in self.headers: if line[:n].lower() == name: @@ -243,8 +240,8 @@ elif not line[:1].isspace(): hit = 0 if hit: - list.append(line) - return list + lst.append(line) + return lst def getfirstmatchingheader(self, name): """Get the first header line matching name. @@ -254,7 +251,7 @@ """ name = name.lower() + ':' n = len(name) - list = [] + lst = [] hit = 0 for line in self.headers: if hit: @@ -263,8 +260,8 @@ elif line[:n].lower() == name: hit = 1 if hit: - list.append(line) - return list + lst.append(line) + return lst def getrawheader(self, name): """A higher-level interface to getfirstmatchingheader(). @@ -275,11 +272,11 @@ occur. """ - list = self.getfirstmatchingheader(name) - if not list: + lst = self.getfirstmatchingheader(name) + if not lst: return None - list[0] = list[0][len(name) + 1:] - return ''.join(list) + lst[0] = lst[0][len(name) + 1:] + return ''.join(lst) def getheader(self, name, default=None): """Get the header value for a name. @@ -288,10 +285,7 @@ header value for a given header name, or None if it doesn't exist. This uses the dictionary version which finds the *last* such header. """ - try: - return self.dict[name.lower()] - except KeyError: - return default + return self.dict.get(name.lower(), default) get = getheader def getheaders(self, name): @@ -399,8 +393,7 @@ del self[name] # Won't fail if it doesn't exist self.dict[name.lower()] = value text = name + ": " + value - lines = text.split("\n") - for line in lines: + for line in text.split("\n"): self.headers.append(line + "\n") def __delitem__(self, name): @@ -411,7 +404,7 @@ del self.dict[name] name = name + ':' n = len(name) - list = [] + lst = [] hit = 0 for i in range(len(self.headers)): line = self.headers[i] @@ -420,8 +413,8 @@ elif not line[:1].isspace(): hit = 0 if hit: - list.append(i) - for i in reversed(list): + lst.append(i) + for i in reversed(lst): del self.headers[i] def setdefault(self, name, default=""): @@ -430,8 +423,7 @@ return self.dict[lowername] else: text = name + ": " + default - lines = text.split("\n") - for line in lines: + for line in text.split("\n"): self.headers.append(line + "\n") self.dict[lowername] = default return default @@ -473,29 +465,28 @@ # XXX The inverses of the parse functions may also be useful. -def unquote(str): +def unquote(s): """Remove quotes from a string.""" - if len(str) > 1: - if str.startswith('"') and str.endswith('"'): - return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') - if str.startswith('<') and str.endswith('>'): - return str[1:-1] - return str + if len(s) > 1: + if s.startswith('"') and s.endswith('"'): + return s[1:-1].replace('\\\\', '\\').replace('\\"', '"') + if s.startswith('<') and s.endswith('>'): + return s[1:-1] + return s -def quote(str): +def quote(s): """Add quotes around a string.""" - return str.replace('\\', '\\\\').replace('"', '\\"') + return s.replace('\\', '\\\\').replace('"', '\\"') def parseaddr(address): """Parse an address into a (realname, mailaddr) tuple.""" a = AddressList(address) - list = a.addresslist - if not list: + lst = a.addresslist + if not lst: return (None, None) - else: - return list[0] + return lst[0] class AddrlistClass: @@ -543,12 +534,10 @@ Returns a list containing all of the addresses. """ result = [] - while 1: + ad = self.getaddress() + while ad: + result += ad ad = self.getaddress() - if ad: - result += ad - else: - break return result def getaddress(self): @@ -581,11 +570,11 @@ returnlist = [] fieldlen = len(self.field) - self.pos = self.pos + 1 + self.pos += 1 while self.pos < len(self.field): self.gotonext() if self.pos < fieldlen and self.field[self.pos] == ';': - self.pos = self.pos + 1 + self.pos += 1 break returnlist = returnlist + self.getaddress() @@ -602,11 +591,11 @@ if plist: returnlist = [(' '.join(self.commentlist), plist[0])] elif self.field[self.pos] in self.specials: - self.pos = self.pos + 1 + self.pos += 1 self.gotonext() if self.pos < len(self.field) and self.field[self.pos] == ',': - self.pos = self.pos + 1 + self.pos += 1 return returnlist def getrouteaddr(self): @@ -618,7 +607,7 @@ return expectroute = 0 - self.pos = self.pos + 1 + self.pos += 1 self.gotonext() adlist = "" while self.pos < len(self.field): @@ -626,16 +615,16 @@ self.getdomain() expectroute = 0 elif self.field[self.pos] == '>': - self.pos = self.pos + 1 + self.pos += 1 break elif self.field[self.pos] == '@': - self.pos = self.pos + 1 + self.pos += 1 expectroute = 1 elif self.field[self.pos] == ':': - self.pos = self.pos + 1 + self.pos += 1 else: adlist = self.getaddrspec() - self.pos = self.pos + 1 + self.pos += 1 break self.gotonext() @@ -649,7 +638,7 @@ while self.pos < len(self.field): if self.field[self.pos] == '.': aslist.append('.') - self.pos = self.pos + 1 + self.pos += 1 elif self.field[self.pos] == '"': aslist.append('"%s"' % self.getquote()) elif self.field[self.pos] in self.atomends: @@ -661,7 +650,7 @@ return ''.join(aslist) aslist.append('@') - self.pos = self.pos + 1 + self.pos += 1 self.gotonext() return ''.join(aslist) + self.getdomain() @@ -670,13 +659,13 @@ sdlist = [] while self.pos < len(self.field): if self.field[self.pos] in self.LWS: - self.pos = self.pos + 1 + self.pos += 1 elif self.field[self.pos] == '(': self.commentlist.append(self.getcomment()) elif self.field[self.pos] == '[': sdlist.append(self.getdomainliteral()) elif self.field[self.pos] == '.': - self.pos = self.pos + 1 + self.pos += 1 sdlist.append('.') elif self.field[self.pos] in self.atomends: break @@ -701,13 +690,13 @@ slist = [''] quote = 0 - self.pos = self.pos + 1 + self.pos += 1 while self.pos < len(self.field): if quote == 1: slist.append(self.field[self.pos]) quote = 0 elif self.field[self.pos] in endchars: - self.pos = self.pos + 1 + self.pos += 1 break elif allowcomments and self.field[self.pos] == '(': slist.append(self.getcomment()) @@ -715,7 +704,7 @@ quote = 1 else: slist.append(self.field[self.pos]) - self.pos = self.pos + 1 + self.pos += 1 return ''.join(slist) @@ -746,7 +735,7 @@ if self.field[self.pos] in atomends: break else: atomlist.append(self.field[self.pos]) - self.pos = self.pos + 1 + self.pos += 1 return ''.join(atomlist) @@ -761,7 +750,7 @@ while self.pos < len(self.field): if self.field[self.pos] in self.LWS: - self.pos = self.pos + 1 + self.pos += 1 elif self.field[self.pos] == '"': plist.append(self.getquote()) elif self.field[self.pos] == '(': @@ -930,16 +919,15 @@ else: tzsign = 1 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) - tuple = (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset) - return tuple + return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset) def parsedate(data): """Convert a time string to a time tuple.""" t = parsedate_tz(data) - if type(t) == type( () ): - return t[:9] - else: return t + if t is None: + return t + return t[:9] def mktime_tz(data): @@ -965,10 +953,10 @@ timeval = time.time() timeval = time.gmtime(timeval) return "%s, %02d %s %04d %02d:%02d:%02d GMT" % ( - ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][timeval[6]], + ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]], timeval[2], - ["Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][timeval[1]-1], + ("Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1], timeval[0], timeval[3], timeval[4], timeval[5]) @@ -1002,7 +990,7 @@ m.rewindbody() n = 0 while f.readline(): - n = n + 1 + n += 1 print 'Lines:', n print '-'*70 print 'len =', len(m) Index: sets.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/sets.py,v retrieving revision 1.43.4.2 retrieving revision 1.43.4.3 diff -u -d -r1.43.4.2 -r1.43.4.3 --- sets.py 7 Jan 2005 06:58:09 -0000 1.43.4.2 +++ sets.py 16 Oct 2005 05:23:59 -0000 1.43.4.3 @@ -480,6 +480,8 @@ value = True if not isinstance(other, BaseSet): other = Set(other) + if self is other: + self.clear() for elt in other: if elt in data: del data[elt] @@ -497,6 +499,8 @@ data = self._data if not isinstance(other, BaseSet): other = Set(other) + if self is other: + self.clear() for elt in ifilter(data.has_key, other): del data[elt] Index: shutil.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/shutil.py,v retrieving revision 1.22.2.2 retrieving revision 1.22.2.3 diff -u -d -r1.22.2.2 -r1.22.2.3 --- shutil.py 7 Jan 2005 06:58:10 -0000 1.22.2.2 +++ shutil.py 16 Oct 2005 05:23:59 -0000 1.22.2.3 @@ -7,13 +7,12 @@ import os import sys import stat -import exceptions from os.path import abspath __all__ = ["copyfileobj","copyfile","copymode","copystat","copy","copy2", "copytree","move","rmtree","Error"] -class Error(exceptions.EnvironmentError): +class Error(EnvironmentError): pass def copyfileobj(fsrc, fdst, length=16*1024): @@ -108,7 +107,7 @@ """ names = os.listdir(src) - os.mkdir(dst) + os.makedirs(dst) errors = [] for name in names: srcname = os.path.join(src, name) @@ -124,6 +123,11 @@ # XXX What about devices, sockets etc.? except (IOError, os.error), why: errors.append((srcname, dstname, why)) + # catch the Error from the recursive copytree so that we can + # continue with other files + except Error, err: + errors.extend(err.args[0]) + copystat(src, dst) if errors: raise Error, errors Index: smtplib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/smtplib.py,v retrieving revision 1.58.2.2 retrieving revision 1.58.2.3 diff -u -d -r1.58.2.2 -r1.58.2.3 --- smtplib.py 7 Jan 2005 06:58:10 -0000 1.58.2.2 +++ smtplib.py 16 Oct 2005 05:23:59 -0000 1.58.2.3 @@ -43,7 +43,7 @@ import socket import re -import rfc822 +import email.Utils import base64 import hmac from email.base64MIME import encode as encode_base64 @@ -171,7 +171,7 @@ """ m = (None, None) try: - m=rfc822.parseaddr(addr)[1] + m = email.Utils.parseaddr(addr)[1] except AttributeError: pass if m == (None, None): # Indicates parse failure or AttributeError @@ -290,10 +290,10 @@ af, socktype, proto, canonname, sa = res try: self.sock = socket.socket(af, socktype, proto) - if self.debuglevel > 0: print>>stderr, 'connect:', (host, port) + if self.debuglevel > 0: print>>stderr, 'connect:', sa self.sock.connect(sa) except socket.error, msg: - if self.debuglevel > 0: print>>stderr, 'connect fail:', (host, port) + if self.debuglevel > 0: print>>stderr, 'connect fail:', msg if self.sock: self.sock.close() self.sock = None @@ -439,7 +439,7 @@ """SMTP 'help' command. Returns help text from server.""" self.putcmd("help", args) - return self.getreply() + return self.getreply()[1] def rset(self): """SMTP 'rset' command -- resets session.""" @@ -578,7 +578,7 @@ (code, resp) = self.docmd(encode_base64(password, eol="")) elif authmethod is None: raise SMTPException("No suitable authentication method found.") - if code not in [235, 503]: + if code not in (235, 503): # 235 == 'Authentication successful' # 503 == 'Error: already authenticated' raise SMTPAuthenticationError(code, resp) Index: socket.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/socket.py,v retrieving revision 1.21.2.2 retrieving revision 1.21.2.3 diff -u -d -r1.21.2.2 -r1.21.2.3 --- socket.py 7 Jan 2005 06:58:10 -0000 1.21.2.2 +++ socket.py 16 Oct 2005 05:23:59 -0000 1.21.2.3 @@ -102,7 +102,7 @@ First the hostname returned by gethostbyaddr() is checked, then possibly existing aliases. In case no FQDN is available, hostname - is returned. + from gethostname() is returned. """ name = name.strip() if not name or name == '0.0.0.0': Index: sre.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/sre.py,v retrieving revision 1.44.10.2 retrieving revision 1.44.10.3 diff -u -d -r1.44.10.2 -r1.44.10.3 --- sre.py 7 Jan 2005 06:58:10 -0000 1.44.10.2 +++ sre.py 16 Oct 2005 05:23:59 -0000 1.44.10.3 @@ -188,12 +188,18 @@ "Compile a template pattern, returning a pattern object" return _compile(pattern, flags|T) +_alphanum = {} +for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890': + _alphanum[c] = 1 +del c + def escape(pattern): "Escape all non-alphanumeric characters in pattern." s = list(pattern) + alphanum = _alphanum for i in range(len(pattern)): c = pattern[i] - if not ("a" <= c <= "z" or "A" <= c <= "Z" or "0" <= c <= "9"): + if c not in alphanum: if c == "\000": s[i] = "\\000" else: Index: sre_compile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/sre_compile.py,v retrieving revision 1.43.2.2 retrieving revision 1.43.2.3 diff -u -d -r1.43.2.2 -r1.43.2.3 --- sre_compile.py 7 Jan 2005 06:58:10 -0000 1.43.2.2 +++ sre_compile.py 16 Oct 2005 05:23:59 -0000 1.43.2.3 @@ -24,14 +24,25 @@ def _identityfunction(x): return x +def set(seq): + s = {} + for elem in seq: + s[elem] = 1 + return s + +_LITERAL_CODES = set([LITERAL, NOT_LITERAL]) +_REPEATING_CODES = set([REPEAT, MIN_REPEAT, MAX_REPEAT]) +_SUCCESS_CODES = set([SUCCESS, FAILURE]) +_ASSERT_CODES = set([ASSERT, ASSERT_NOT]) + def _compile(code, pattern, flags): # internal: compile a (sub)pattern emit = code.append _len = len - LITERAL_CODES = {LITERAL:1, NOT_LITERAL:1} - REPEATING_CODES = {REPEAT:1, MIN_REPEAT:1, MAX_REPEAT:1} - SUCCESS_CODES = {SUCCESS:1, FAILURE:1} - ASSERT_CODES = {ASSERT:1, ASSERT_NOT:1} + LITERAL_CODES = _LITERAL_CODES + REPEATING_CODES = _REPEATING_CODES + SUCCESS_CODES = _SUCCESS_CODES + ASSERT_CODES = _ASSERT_CODES for op, av in pattern: if op in LITERAL_CODES: if flags & SRE_FLAG_IGNORECASE: @@ -156,7 +167,7 @@ emit(av-1) elif op is GROUPREF_EXISTS: emit(OPCODES[op]) - emit((av[0]-1)*2) + emit(av[0]-1) skipyes = _len(code); emit(0) _compile(code, av[1], flags) if av[2]: Index: sre_parse.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/sre_parse.py,v retrieving revision 1.55.2.2 retrieving revision 1.55.2.3 diff -u -d -r1.55.2.2 -r1.55.2.3 --- sre_parse.py 7 Jan 2005 06:58:10 -0000 1.55.2.2 +++ sre_parse.py 16 Oct 2005 05:23:59 -0000 1.55.2.3 @@ -16,15 +16,21 @@ from sre_constants import * +def set(seq): + s = {} + for elem in seq: + s[elem] = 1 + return s + SPECIAL_CHARS = ".\\[{()*+?^$|" REPEAT_CHARS = "*+?{" -DIGITS = tuple("0123456789") +DIGITS = set("0123456789") -OCTDIGITS = tuple("01234567") -HEXDIGITS = tuple("0123456789abcdefABCDEF") +OCTDIGITS = set("01234567") +HEXDIGITS = set("0123456789abcdefABCDEF") -WHITESPACE = tuple(" \t\n\r\v\f") +WHITESPACE = set(" \t\n\r\v\f") ESCAPES = { r"\a": (LITERAL, ord("\a")), @@ -371,6 +377,11 @@ subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) return subpattern +_PATTERNENDERS = set("|)") +_ASSERTCHARS = set("=!<") +_LOOKBEHINDASSERTCHARS = set("=!") +_REPEATCODES = set([MIN_REPEAT, MAX_REPEAT]) + def _parse(source, state): # parse a simple pattern subpattern = SubPattern(state) @@ -380,10 +391,10 @@ sourceget = source.get sourcematch = source.match _len = len - PATTERNENDERS = ("|", ")") - ASSERTCHARS = ("=", "!", "<") - LOOKBEHINDASSERTCHARS = ("=", "!") - REPEATCODES = (MIN_REPEAT, MAX_REPEAT) + PATTERNENDERS = _PATTERNENDERS + ASSERTCHARS = _ASSERTCHARS + LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS + REPEATCODES = _REPEATCODES while 1: @@ -474,6 +485,9 @@ elif this == "+": min, max = 1, MAXREPEAT elif this == "{": + if source.next == "}": + subpatternappend((LITERAL, ord(this))) + continue here = source.tell() min, max = 0, MAXREPEAT lo = hi = "" Index: subprocess.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/subprocess.py,v retrieving revision 1.13.2.1 retrieving revision 1.13.2.2 diff -u -d -r1.13.2.1 -r1.13.2.2 --- subprocess.py 7 Jan 2005 06:58:10 -0000 1.13.2.1 +++ subprocess.py 16 Oct 2005 05:23:59 -0000 1.13.2.2 @@ -2,28 +2,12 @@ # # For more information about this module, see PEP 324. # -# Copyright (c) 2003-2004 by Peter Astrand -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: +# This module should remain compatible with Python 2.2, see PEP 291. # -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of the -# author not be used in advertising or publicity pertaining to -# distribution of the software without specific, written prior -# permission. +# Copyright (c) 2003-2005 by Peter Astrand # -# THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR -# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, -# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION -# WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/2.4/license for licensing details. r"""subprocess - Subprocesses with accessible I/O streams @@ -528,6 +512,7 @@ result.extend(bs_buf) if needquote: + result.extend(bs_buf) result.append('"') return ''.join(result) @@ -615,6 +600,33 @@ data = data.replace("\r", "\n") return data + def communicate(self, input=None): + """Interact with process: Send data to stdin. Read data from + stdout and stderr, until end-of-file is reached. Wait for + process to terminate. The optional input argument should be a + string to be sent to the child process, or None, if no data + should be sent to the child. + + communicate() returns a tuple (stdout, stderr).""" + + # Optimization: If we are only using one pipe, or no pipe at + # all, using select() or threads is unnecessary. + if [self.stdin, self.stdout, self.stderr].count(None) >= 2: + stdout = None + stderr = None + if self.stdin: + if input: + self.stdin.write(input) + self.stdin.close() + elif self.stdout: + stdout = self.stdout.read() + elif self.stderr: + stderr = self.stderr.read() + self.wait() + return (stdout, stderr) + + return self._communicate(input) + if mswindows: # @@ -624,42 +636,42 @@ """Construct and return tupel with IO objects: p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite """ - if stdin == None and stdout == None and stderr == None: + if stdin is None and stdout is None and stderr is None: return (None, None, None, None, None, None) p2cread, p2cwrite = None, None c2pread, c2pwrite = None, None errread, errwrite = None, None - if stdin == None: + if stdin is None: p2cread = GetStdHandle(STD_INPUT_HANDLE) elif stdin == PIPE: p2cread, p2cwrite = CreatePipe(None, 0) # Detach and turn into fd p2cwrite = p2cwrite.Detach() p2cwrite = msvcrt.open_osfhandle(p2cwrite, 0) - elif type(stdin) == types.IntType: + elif isinstance(stdin, int): p2cread = msvcrt.get_osfhandle(stdin) else: # Assuming file-like object p2cread = msvcrt.get_osfhandle(stdin.fileno()) p2cread = self._make_inheritable(p2cread) - if stdout == None: + if stdout is None: c2pwrite = GetStdHandle(STD_OUTPUT_HANDLE) elif stdout == PIPE: c2pread, c2pwrite = CreatePipe(None, 0) # Detach and turn into fd c2pread = c2pread.Detach() c2pread = msvcrt.open_osfhandle(c2pread, 0) - elif type(stdout) == types.IntType: + elif isinstance(stdout, int): c2pwrite = msvcrt.get_osfhandle(stdout) else: # Assuming file-like object c2pwrite = msvcrt.get_osfhandle(stdout.fileno()) c2pwrite = self._make_inheritable(c2pwrite) - if stderr == None: + if stderr is None: errwrite = GetStdHandle(STD_ERROR_HANDLE) elif stderr == PIPE: errread, errwrite = CreatePipe(None, 0) @@ -668,7 +680,7 @@ errread = msvcrt.open_osfhandle(errread, 0) elif stderr == STDOUT: errwrite = c2pwrite - elif type(stderr) == types.IntType: + elif isinstance(stderr, int): errwrite = msvcrt.get_osfhandle(stderr) else: # Assuming file-like object @@ -716,7 +728,7 @@ # Process startup details default_startupinfo = STARTUPINFO() - if startupinfo == None: + if startupinfo is None: startupinfo = default_startupinfo if not None in (p2cread, c2pwrite, errwrite): startupinfo.dwFlags |= STARTF_USESTDHANDLES @@ -775,18 +787,18 @@ # output pipe are maintained in this process or else the # pipe will not close when the child process exits and the # ReadFile will hang. - if p2cread != None: + if p2cread is not None: p2cread.Close() - if c2pwrite != None: + if c2pwrite is not None: c2pwrite.Close() - if errwrite != None: + if errwrite is not None: errwrite.Close() def poll(self): """Check if child process has terminated. Returns returncode attribute.""" - if self.returncode == None: + if self.returncode is None: if WaitForSingleObject(self._handle, 0) == WAIT_OBJECT_0: self.returncode = GetExitCodeProcess(self._handle) _active.remove(self) @@ -796,7 +808,7 @@ def wait(self): """Wait for child process to terminate. Returns returncode attribute.""" - if self.returncode == None: + if self.returncode is None: obj = WaitForSingleObject(self._handle, INFINITE) self.returncode = GetExitCodeProcess(self._handle) _active.remove(self) @@ -807,14 +819,7 @@ buffer.append(fh.read()) - def communicate(self, input=None): - """Interact with process: Send data to stdin. Read data from - stdout and stderr, until end-of-file is reached. Wait for - process to terminate. The optional input argument should be a - string to be sent to the child process, or None, if no data - should be sent to the child. - - communicate() returns a tuple (stdout, stderr).""" + def _communicate(self, input): stdout = None # Return stderr = None # Return @@ -832,7 +837,7 @@ stderr_thread.start() if self.stdin: - if input != None: + if input is not None: self.stdin.write(input) self.stdin.close() @@ -842,9 +847,9 @@ stderr_thread.join() # All data exchanged. Translate lists into strings. - if stdout != None: + if stdout is not None: stdout = stdout[0] - if stderr != None: + if stderr is not None: stderr = stderr[0] # Translate newlines, if requested. We cannot let the file @@ -872,33 +877,33 @@ c2pread, c2pwrite = None, None errread, errwrite = None, None - if stdin == None: + if stdin is None: pass elif stdin == PIPE: p2cread, p2cwrite = os.pipe() - elif type(stdin) == types.IntType: + elif isinstance(stdin, int): p2cread = stdin else: # Assuming file-like object p2cread = stdin.fileno() - if stdout == None: + if stdout is None: pass elif stdout == PIPE: c2pread, c2pwrite = os.pipe() - elif type(stdout) == types.IntType: + elif isinstance(stdout, int): c2pwrite = stdout else: # Assuming file-like object c2pwrite = stdout.fileno() - if stderr == None: + if stderr is None: pass elif stderr == PIPE: errread, errwrite = os.pipe() elif stderr == STDOUT: errwrite = c2pwrite - elif type(stderr) == types.IntType: + elif isinstance(stderr, int): errwrite = stderr else: # Assuming file-like object @@ -943,7 +948,7 @@ if shell: args = ["/bin/sh", "-c"] + args - if executable == None: + if executable is None: executable = args[0] # For transferring possible exec failure from child to parent @@ -986,13 +991,13 @@ if close_fds: self._close_fds(but=errpipe_write) - if cwd != None: + if cwd is not None: os.chdir(cwd) if preexec_fn: apply(preexec_fn) - if env == None: + if env is None: os.execvp(executable, args) else: os.execvpe(executable, args, env) @@ -1043,7 +1048,7 @@ def poll(self): """Check if child process has terminated. Returns returncode attribute.""" - if self.returncode == None: + if self.returncode is None: try: pid, sts = os.waitpid(self.pid, os.WNOHANG) if pid == self.pid: @@ -1056,20 +1061,13 @@ def wait(self): """Wait for child process to terminate. Returns returncode attribute.""" - if self.returncode == None: + if self.returncode is None: pid, sts = os.waitpid(self.pid, 0) self._handle_exitstatus(sts) return self.returncode - def communicate(self, input=None): - """Interact with process: Send data to stdin. Read data from - stdout and stderr, until end-of-file is reached. Wait for - process to terminate. The optional input argument should be a - string to be sent to the child process, or None, if no data - should be sent to the child. - - communicate() returns a tuple (stdout, stderr).""" + def _communicate(self, input): read_set = [] write_set = [] stdout = None # Return @@ -1118,9 +1116,9 @@ stderr.append(data) # All data exchanged. Translate lists into strings. - if stdout != None: + if stdout is not None: stdout = ''.join(stdout) - if stderr != None: + if stderr is not None: stderr = ''.join(stderr) # Translate newlines, if requested. We cannot let the file Index: symbol.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/symbol.py,v retrieving revision 1.14.12.2 retrieving revision 1.14.12.3 diff -u -d -r1.14.12.2 -r1.14.12.3 --- symbol.py 7 Jan 2005 06:58:10 -0000 1.14.12.2 +++ symbol.py 16 Oct 2005 05:23:59 -0000 1.14.12.3 @@ -88,6 +88,7 @@ gen_if = 331 testlist1 = 332 encoding_decl = 333 +yield_expr = 334 #--end constants-- sym_name = {} Index: tarfile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/tarfile.py,v retrieving revision 1.8.4.2 retrieving revision 1.8.4.3 diff -u -d -r1.8.4.2 -r1.8.4.3 --- tarfile.py 7 Jan 2005 06:58:10 -0000 1.8.4.2 +++ tarfile.py 16 Oct 2005 05:23:59 -0000 1.8.4.3 @@ -274,7 +274,7 @@ _Stream is intended to be used only internally. """ - def __init__(self, name, mode, type, fileobj, bufsize): + def __init__(self, name, mode, comptype, fileobj, bufsize): """Construct a _Stream object. """ self._extfileobj = True @@ -282,16 +282,22 @@ fileobj = _LowLevelFile(name, mode) self._extfileobj = False - self.name = name or "" - self.mode = mode - self.type = type - self.fileobj = fileobj - self.bufsize = bufsize - self.buf = "" - self.pos = 0L - self.closed = False + if comptype == '*': + # Enable transparent compression detection for the + # stream interface + fileobj = _StreamProxy(fileobj) + comptype = fileobj.getcomptype() - if type == "gz": + self.name = name or "" + self.mode = mode + self.comptype = comptype + self.fileobj = fileobj + self.bufsize = bufsize + self.buf = "" + self.pos = 0L + self.closed = False + + if comptype == "gz": try: import zlib except ImportError: @@ -303,7 +309,7 @@ else: self._init_write_gz() - if type == "bz2": + if comptype == "bz2": try: import bz2 except ImportError: @@ -315,7 +321,7 @@ self.cmp = bz2.BZ2Compressor() def __del__(self): - if not self.closed: + if hasattr(self, "closed") and not self.closed: self.close() def _init_write_gz(self): @@ -334,10 +340,10 @@ def write(self, s): """Write string s to the stream. """ - if self.type == "gz": + if self.comptype == "gz": self.crc = self.zlib.crc32(s, self.crc) self.pos += len(s) - if self.type != "tar": + if self.comptype != "tar": s = self.cmp.compress(s) self.__write(s) @@ -357,12 +363,16 @@ if self.closed: return - if self.mode == "w" and self.type != "tar": + if self.mode == "w" and self.comptype != "tar": self.buf += self.cmp.flush() + if self.mode == "w" and self.buf: + blocks, remainder = divmod(len(self.buf), self.bufsize) + if remainder > 0: + self.buf += NUL * (self.bufsize - remainder) self.fileobj.write(self.buf) self.buf = "" - if self.type == "gz": + if self.comptype == "gz": self.fileobj.write(struct.pack("" % (self.__class__.__name__,self.name,id(self)) + @classmethod def frombuf(cls, buf): """Construct a TarInfo object from a 512 byte string buffer. """ @@ -699,8 +750,6 @@ tarinfo.name += "/" return tarinfo - frombuf = classmethod(frombuf) - def tobuf(self): """Return a tar header block as a 512 byte string. """ @@ -858,12 +907,13 @@ # the super-constructor. A sub-constructor is registered and made available # by adding it to the mapping in OPEN_METH. + @classmethod def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512): """Open a tar archive for reading, writing or appending. Return an appropriate TarFile class. mode: - 'r' open for reading with transparent compression + 'r' or 'r:*' open for reading with transparent compression 'r:' open for reading exclusively uncompressed 'r:gz' open for reading with gzip compression 'r:bz2' open for reading with bzip2 compression @@ -871,6 +921,8 @@ 'w' or 'w:' open for writing without compression 'w:gz' open for writing with gzip compression 'w:bz2' open for writing with bzip2 compression + + 'r|*' open a stream of tar blocks with transparent compression 'r|' open an uncompressed stream of tar blocks for reading 'r|gz' open a gzip compressed stream of tar blocks 'r|bz2' open a bzip2 compressed stream of tar blocks @@ -882,7 +934,17 @@ if not name and not fileobj: raise ValueError, "nothing to open" - if ":" in mode: + if mode in ("r", "r:*"): + # Find out which *open() is appropriate for opening the file. + for comptype in cls.OPEN_METH: + func = getattr(cls, cls.OPEN_METH[comptype]) + try: + return func(name, "r", fileobj) + except (ReadError, CompressionError): + continue + raise ReadError, "file could not be opened successfully" + + elif ":" in mode: filemode, comptype = mode.split(":", 1) filemode = filemode or "r" comptype = comptype or "tar" @@ -908,23 +970,12 @@ t._extfileobj = False return t - elif mode == "r": - # Find out which *open() is appropriate for opening the file. - for comptype in cls.OPEN_METH: - func = getattr(cls, cls.OPEN_METH[comptype]) - try: - return func(name, "r", fileobj) - except (ReadError, CompressionError): - continue - raise ReadError, "file could not be opened successfully" - elif mode in "aw": return cls.taropen(name, mode, fileobj) raise ValueError, "undiscernible mode" - open = classmethod(open) - + @classmethod def taropen(cls, name, mode="r", fileobj=None): """Open uncompressed tar archive name for reading or writing. """ @@ -932,8 +983,7 @@ raise ValueError, "mode must be 'r', 'a' or 'w'" return cls(name, mode, fileobj) - taropen = classmethod(taropen) - + @classmethod def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9): """Open gzip compressed tar archive name for reading or writing. Appending is not allowed. @@ -970,8 +1020,7 @@ t._extfileobj = False return t - gzopen = classmethod(gzopen) - + @classmethod def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9): """Open bzip2 compressed tar archive name for reading or writing. Appending is not allowed. @@ -1002,8 +1051,6 @@ t._extfileobj = False return t - bz2open = classmethod(bz2open) - # All *open() methods are registered here. OPEN_METH = { "tar": "taropen", # uncompressed tar @@ -1132,17 +1179,16 @@ # Fill the TarInfo object with all # information we can get. - tarinfo.name = arcname - tarinfo.mode = stmd - tarinfo.uid = statres.st_uid - tarinfo.gid = statres.st_gid - if stat.S_ISDIR(stmd): - # For a directory, the size must be 0 - tarinfo.size = 0 - else: + tarinfo.name = arcname + tarinfo.mode = stmd + tarinfo.uid = statres.st_uid + tarinfo.gid = statres.st_gid + if stat.S_ISREG(stmd): tarinfo.size = statres.st_size + else: + tarinfo.size = 0L tarinfo.mtime = statres.st_mtime - tarinfo.type = type + tarinfo.type = type tarinfo.linkname = linkname if pwd: try: @@ -1233,16 +1279,15 @@ self.addfile(tarinfo, f) f.close() - if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE): - tarinfo.size = 0L - self.addfile(tarinfo) - - if tarinfo.isdir(): + elif tarinfo.isdir(): self.addfile(tarinfo) if recursive: for f in os.listdir(name): self.add(os.path.join(name, f), os.path.join(arcname, f)) + else: + self.addfile(tarinfo) + def addfile(self, tarinfo, fileobj=None): """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is given, tarinfo.size bytes are read from it and added to the archive. @@ -1310,6 +1355,47 @@ self.members.append(tarinfo) + def extractall(self, path=".", members=None): + """Extract all members from the archive to the current working + directory and set owner, modification time and permissions on + directories afterwards. `path' specifies a different directory + to extract to. `members' is optional and must be a subset of the + list returned by getmembers(). + """ + directories = [] + + if members is None: + members = self + + for tarinfo in members: + if tarinfo.isdir(): + # Extract directory with a safe mode, so that + # all files below can be extracted as well. + try: + os.makedirs(os.path.join(path, tarinfo.name), 0777) + except EnvironmentError: + pass + directories.append(tarinfo) + else: + self.extract(tarinfo, path) + + # Reverse sort directories. + directories.sort(lambda a, b: cmp(a.name, b.name)) + directories.reverse() + + # Set correct owner, mtime and filemode on directories. + for tarinfo in directories: + path = os.path.join(path, tarinfo.name) + try: + self.chown(tarinfo, path) + self.utime(tarinfo, path) + self.chmod(tarinfo, path) + except ExtractError, e: + if self.errorlevel > 1: + raise + else: + self._dbg(1, "tarfile: %s" % e) + def extract(self, member, path=""): """Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately @@ -1374,7 +1460,7 @@ # stream of tar blocks. raise StreamError, "cannot extract (sym)link as file object" else: - # A (sym)link's file object is it's target's file object. + # A (sym)link's file object is its target's file object. return self.extractfile(self._getmember(tarinfo.linkname, tarinfo)) else: @@ -1840,6 +1926,7 @@ """Construct a TarIter object. """ self.tarfile = tarfile + self.index = 0 def __iter__(self): """Return iterator object. """ @@ -1848,10 +1935,20 @@ """Return the next item using TarFile's next() method. When all members have been read, set TarFile as _loaded. """ - tarinfo = self.tarfile.next() - if not tarinfo: - self.tarfile._loaded = True - raise StopIteration + # Fix for SF #1100429: Under rare circumstances it can + # happen that getmembers() is called during iteration, + # which will cause TarIter to stop prematurely. + if not self.tarfile._loaded: + tarinfo = self.tarfile.next() + if not tarinfo: + self.tarfile._loaded = True + raise StopIteration + else: + try: + tarinfo = self.tarfile.members[self.index] + except IndexError: + raise StopIteration + self.index += 1 return tarinfo # Helper classes for sparse file support @@ -1915,8 +2012,7 @@ raise ValueError, "unknown compression constant" if mode[0:1] == "r": members = self.tarfile.getmembers() - for i in xrange(len(members)): - m = members[i] + for m in members: m.filename = m.name m.file_size = m.size m.date_time = time.gmtime(m.mtime)[:6] Index: telnetlib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/telnetlib.py,v retrieving revision 1.19.2.2 retrieving revision 1.19.2.3 diff -u -d -r1.19.2.2 -r1.19.2.3 --- telnetlib.py 7 Jan 2005 06:58:11 -0000 1.19.2.2 +++ telnetlib.py 16 Oct 2005 05:23:59 -0000 1.19.2.3 @@ -1,4 +1,4 @@ -"""TELNET client class. +r"""TELNET client class. Based on RFC 854: TELNET Protocol Specification, by J. Postel and J. Reynolds Index: tempfile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/tempfile.py,v retrieving revision 1.39.2.2 retrieving revision 1.39.2.3 diff -u -d -r1.39.2.2 -r1.39.2.3 --- tempfile.py 7 Jan 2005 06:58:11 -0000 1.39.2.2 +++ tempfile.py 16 Oct 2005 05:23:59 -0000 1.39.2.3 @@ -414,9 +414,9 @@ 'bufsize' -- the buffer size argument to os.fdopen (default -1). The file is created as mkstemp() would do it. - Returns a file object; the name of the file is accessible as - file.name. The file will be automatically deleted when it is - closed. + Returns an object with a file-like interface; the name of the file + is accessible as file.name. The file will be automatically deleted + when it is closed. """ if dir is None: @@ -451,8 +451,8 @@ 'bufsize' -- the buffer size argument to os.fdopen (default -1). The file is created as mkstemp() would do it. - Returns a file object. The file has no name, and will cease to - exist when it is closed. + Returns an object with a file-like interface. The file has no + name, and will cease to exist when it is closed. """ if dir is None: Index: textwrap.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/textwrap.py,v retrieving revision 1.12.2.2 retrieving revision 1.12.2.3 diff -u -d -r1.12.2.2 -r1.12.2.3 --- textwrap.py 7 Jan 2005 06:58:11 -0000 1.12.2.2 +++ textwrap.py 16 Oct 2005 05:23:59 -0000 1.12.2.3 @@ -78,9 +78,10 @@ # splits into # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! # (after stripping out empty strings). - wordsep_re = re.compile(r'(\s+|' # any whitespace - r'[^\s\w]*\w{2,}-(?=\w{2,})|' # hyphenated words - r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash + wordsep_re = re.compile( + r'(\s+|' # any whitespace + r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words + r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash # XXX this is not locale- or charset-aware -- string.lowercase # is US-ASCII only (and therefore English-only) @@ -160,7 +161,7 @@ else: i += 1 - def _handle_long_word(self, chunks, cur_line, cur_len, width): + def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): """_handle_long_word(chunks : [string], cur_line : [string], cur_len : int, width : int) @@ -173,14 +174,14 @@ # If we're allowed to break long words, then do so: put as much # of the next chunk onto the current line as will fit. if self.break_long_words: - cur_line.append(chunks[0][0:space_left]) - chunks[0] = chunks[0][space_left:] + cur_line.append(reversed_chunks[-1][:space_left]) + reversed_chunks[-1] = reversed_chunks[-1][space_left:] # Otherwise, we have to preserve the long word intact. Only add # it to the current line if there's nothing already there -- # that minimizes how much we violate the width constraint. elif not cur_line: - cur_line.append(chunks.pop(0)) + cur_line.append(reversed_chunks.pop()) # If we're not allowed to break long words, and there's already # text on the current line, do nothing. Next time through the @@ -205,6 +206,10 @@ if self.width <= 0: raise ValueError("invalid width %r (must be > 0)" % self.width) + # Arrange in reverse order so items can be efficiently popped + # from a stack of chucks. + chunks.reverse() + while chunks: # Start the list of chunks that will make up the current line. @@ -223,15 +228,15 @@ # First chunk on line is whitespace -- drop it, unless this # is the very beginning of the text (ie. no lines started yet). - if chunks[0].strip() == '' and lines: - del chunks[0] + if chunks[-1].strip() == '' and lines: + del chunks[-1] while chunks: - l = len(chunks[0]) + l = len(chunks[-1]) # Can at least squeeze this chunk onto the current line. if cur_len + l <= width: - cur_line.append(chunks.pop(0)) + cur_line.append(chunks.pop()) cur_len += l # Nope, this line is full. @@ -240,7 +245,7 @@ # The current line is full, and the next chunk is too big to # fit on *any* line (not just this one). - if chunks and len(chunks[0]) > width: + if chunks and len(chunks[-1]) > width: self._handle_long_word(chunks, cur_line, cur_len, width) # If the last chunk on this line is all whitespace, drop it. Index: threading.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/threading.py,v retrieving revision 1.24.2.2 retrieving revision 1.24.2.3 diff -u -d -r1.24.2.2 -r1.24.2.3 --- threading.py 7 Jan 2005 06:58:11 -0000 1.24.2.2 +++ threading.py 16 Oct 2005 05:23:59 -0000 1.24.2.3 @@ -102,7 +102,7 @@ self.__owner = me self.__count = 1 if __debug__: - self._note("%s.acquire(%s): initial succes", self, blocking) + self._note("%s.acquire(%s): initial success", self, blocking) else: if __debug__: self._note("%s.acquire(%s): failure", self, blocking) @@ -358,7 +358,7 @@ # Active thread administration _active_limbo_lock = _allocate_lock() -_active = {} +_active = {} # maps thread id to Thread object _limbo = {} @@ -374,9 +374,11 @@ __exc_info = _sys.exc_info def __init__(self, group=None, target=None, name=None, - args=(), kwargs={}, verbose=None): + args=(), kwargs=None, verbose=None): assert group is None, "group argument must be None for now" _Verbose.__init__(self, verbose) + if kwargs is None: + kwargs = {} self.__target = target self.__name = str(name or _newname()) self.__args = args @@ -643,8 +645,9 @@ # Dummy thread class to represent threads not started here. -# These aren't garbage collected when they die, -# nor can they be waited for. +# These aren't garbage collected when they die, nor can they be waited for. +# If they invoke anything in threading.py that calls currentThread(), they +# leave an entry in the _active dict forever after. # Their purpose is to return *something* from currentThread(). # They are marked as daemon threads so we won't wait for them # when we exit (conform previous semantics). @@ -653,6 +656,12 @@ def __init__(self): Thread.__init__(self, name=_newname("Dummy-%d")) + + # Thread.__block consumes an OS-level locking primitive, which + # can never be used by a _DummyThread. Since a _DummyThread + # instance is immortal, that's bad, so release this resource. + del self._Thread__block + self._Thread__started = True _active_limbo_lock.acquire() _active[_get_ident()] = self Index: tokenize.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/tokenize.py,v retrieving revision 1.32.2.2 retrieving revision 1.32.2.3 diff -u -d -r1.32.2.2 -r1.32.2.3 --- tokenize.py 7 Jan 2005 06:58:11 -0000 1.32.2.2 +++ tokenize.py 16 Oct 2005 05:23:59 -0000 1.32.2.3 @@ -31,7 +31,7 @@ import token __all__ = [x for x in dir(token) if x[0] != '_'] + ["COMMENT", "tokenize", - "generate_tokens", "NL"] + "generate_tokens", "NL", "untokenize"] del x del token @@ -159,12 +159,55 @@ for token_info in generate_tokens(readline): tokeneater(*token_info) + +def untokenize(iterable): + """Transform tokens back into Python source code. + + Each element returned by the iterable must be a token sequence + with at least two elements, a token number and token value. + + Round-trip invariant: + # Output text will tokenize the back to the input + t1 = [tok[:2] for tok in generate_tokens(f.readline)] + newcode = untokenize(t1) + readline = iter(newcode.splitlines(1)).next + t2 = [tok[:2] for tokin generate_tokens(readline)] + assert t1 == t2 + """ + + startline = False + indents = [] + toks = [] + toks_append = toks.append + for tok in iterable: + toknum, tokval = tok[:2] + + if toknum == NAME: + tokval += ' ' + + if toknum == INDENT: + indents.append(tokval) + continue + elif toknum == DEDENT: + indents.pop() + continue + elif toknum in (NEWLINE, COMMENT, NL): + startline = True + elif startline and indents: + toks_append(indents[-1]) + startline = False + toks_append(tokval) + return ''.join(toks) + + def generate_tokens(readline): """ The generate_tokens() generator requires one argment, readline, which must be a callable object which provides the same interface as the readline() method of built-in file objects. Each call to the function - should return one line of input as a string. + should return one line of input as a string. Alternately, readline + can be a callable function terminating with StopIteration: + readline = open(myfile).next # Example of alternate readline The generator produces 5-tuples with these members: the token type; the token string; a 2-tuple (srow, scol) of ints specifying the row and @@ -180,7 +223,10 @@ indents = [0] while 1: # loop over lines in stream - line = readline() + try: + line = readline() + except StopIteration: + line = '' lnum = lnum + 1 pos, max = 0, len(line) @@ -225,6 +271,9 @@ indents.append(column) yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line) while column < indents[-1]: + if column not in indents: + raise IndentationError( + "unindent does not match any outer indentation level") indents = indents[:-1] yield (DEDENT, '', (lnum, pos), (lnum, pos), line) Index: unittest.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/unittest.py,v retrieving revision 1.16.2.2 retrieving revision 1.16.2.3 diff -u -d -r1.16.2.2 -r1.16.2.3 --- unittest.py 7 Jan 2005 06:58:11 -0000 1.16.2.2 +++ unittest.py 16 Oct 2005 05:23:59 -0000 1.16.2.3 @@ -71,7 +71,7 @@ False, True = 0, 1 def isinstance(obj, clsinfo): import __builtin__ - if type(clsinfo) in (types.TupleType, types.ListType): + if type(clsinfo) in (tuple, list): for cls in clsinfo: if cls is type: cls = types.ClassType if __builtin__.isinstance(obj, cls): Index: urllib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/urllib.py,v retrieving revision 1.148.2.2 retrieving revision 1.148.2.3 diff -u -d -r1.148.2.2 -r1.148.2.3 --- urllib.py 7 Jan 2005 06:58:11 -0000 1.148.2.2 +++ urllib.py 16 Oct 2005 05:23:59 -0000 1.148.2.3 @@ -86,6 +86,11 @@ if _urlopener: _urlopener.cleanup() +# exception raised when downloaded size does not match content-length +class ContentTooShortError(IOError): + def __init__(self, message, content): + IOError.__init__(self, message) + self.content = content ftpcache = {} class URLopener: @@ -228,24 +233,31 @@ self.tempcache[url] = result bs = 1024*8 size = -1 - blocknum = 1 + read = 0 + blocknum = 0 if reporthook: if "content-length" in headers: size = int(headers["Content-Length"]) - reporthook(0, bs, size) - block = fp.read(bs) - if reporthook: - reporthook(1, bs, size) - while block: - tfp.write(block) + reporthook(blocknum, bs, size) + while 1: block = fp.read(bs) - blocknum = blocknum + 1 + if block == "": + break + read += len(block) + tfp.write(block) + blocknum += 1 if reporthook: reporthook(blocknum, bs, size) fp.close() tfp.close() del fp del tfp + + # raise exception if actual size does not match content-length header + if size >= 0 and read < size: + raise ContentTooShortError("retrieval incomplete: got only %i out " + "of %i bytes" % (read, size), result) + return result # Each method named open_ knows how to open that type of URL @@ -1037,23 +1049,18 @@ return selector[1], selector[2:] return None, selector +_hextochr = dict(('%02x' % i, chr(i)) for i in range(256)) +_hextochr.update(('%02X' % i, chr(i)) for i in range(256)) + def unquote(s): """unquote('abc%20def') -> 'abc def'.""" - mychr = chr - myatoi = int - list = s.split('%') - res = [list[0]] - myappend = res.append - del list[0] - for item in list: - if item[1:2]: - try: - myappend(mychr(myatoi(item[:2], 16)) - + item[2:]) - except ValueError: - myappend('%' + item) - else: - myappend('%' + item) + res = s.split('%') + for i in xrange(1, len(res)): + item = res[i] + try: + res[i] = _hextochr[item[:2]] + item[2:] + except KeyError: + res[i] = '%' + item return "".join(res) def unquote_plus(s): @@ -1064,22 +1071,7 @@ always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' '0123456789' '_.-') - -_fast_safe_test = always_safe + '/' -_fast_safe = None - -def _fast_quote(s): - global _fast_safe - if _fast_safe is None: - _fast_safe = {} - for c in _fast_safe_test: - _fast_safe[c] = c - res = list(s) - for i in range(len(res)): - c = res[i] - if not c in _fast_safe: - res[i] = '%%%02X' % ord(c) - return ''.join(res) +_safemaps = {} def quote(s, safe = '/'): """quote('abc def') -> 'abc%20def' @@ -1102,25 +1094,25 @@ called on a path where the existing slash characters are used as reserved characters. """ - safe = always_safe + safe - if _fast_safe_test == safe: - return _fast_quote(s) - res = list(s) - for i in range(len(res)): - c = res[i] - if c not in safe: - res[i] = '%%%02X' % ord(c) + cachekey = (safe, always_safe) + try: + safe_map = _safemaps[cachekey] + except KeyError: + safe += always_safe + safe_map = {} + for i in range(256): + c = chr(i) + safe_map[c] = (c in safe) and c or ('%%%02X' % i) + _safemaps[cachekey] = safe_map + res = map(safe_map.__getitem__, s) return ''.join(res) def quote_plus(s, safe = ''): """Quote the query fragment of a URL; replacing ' ' with '+'""" if ' ' in s: - l = s.split(' ') - for i in range(len(l)): - l[i] = quote(l[i], safe) - return '+'.join(l) - else: - return quote(s, safe) + s = quote(s, safe + ' ') + return s.replace(' ', '+') + return quote(s, safe) def urlencode(query,doseq=0): """Encode a sequence of two-element tuples or dictionary into a URL query string. Index: urllib2.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/urllib2.py,v retrieving revision 1.31.2.2 retrieving revision 1.31.2.3 diff -u -d -r1.31.2.2 -r1.31.2.3 --- urllib2.py 7 Jan 2005 06:58:11 -0000 1.31.2.2 +++ urllib2.py 16 Oct 2005 05:23:59 -0000 1.31.2.3 @@ -277,8 +277,8 @@ class OpenerDirector: def __init__(self): - server_version = "Python-urllib/%s" % __version__ - self.addheaders = [('User-agent', server_version)] + client_version = "Python-urllib/%s" % __version__ + self.addheaders = [('User-agent', client_version)] # manage the individual handlers self.handlers = [] self.handle_open = {} @@ -304,10 +304,13 @@ self.handle_error[protocol] = lookup elif condition == "open": kind = protocol - lookup = getattr(self, "handle_"+condition) - elif condition in ["response", "request"]: + lookup = self.handle_open + elif condition == "response": kind = protocol - lookup = getattr(self, "process_"+condition) + lookup = self.process_response + elif condition == "request": + kind = protocol + lookup = self.process_request else: continue @@ -381,7 +384,7 @@ 'unknown_open', req) def error(self, proto, *args): - if proto in ['http', 'https']: + if proto in ('http', 'https'): # XXX http[s] protocols are special-cased dict = self.handle_error['http'] # https is not different than http proto = args[2] # YUCK! @@ -582,7 +585,7 @@ if ':' in user_pass: user, password = user_pass.split(':', 1) user_pass = base64.encodestring('%s:%s' % (unquote(user), - unquote(password))) + unquote(password))).strip() req.add_header('Proxy-authorization', 'Basic ' + user_pass) host = unquote(host) req.set_proxy(host, type) @@ -859,7 +862,7 @@ entdig = None A1 = "%s:%s:%s" % (user, realm, pw) - A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET', + A2 = "%s:%s" % (req.get_method(), # XXX selector: what about proxies and full urls req.get_selector()) if qop == 'auth': @@ -1069,46 +1072,43 @@ In particular, parse comma-separated lists where the elements of the list may include quoted-strings. A quoted-string could - contain a comma. + contain a comma. A non-quoted string could have quotes in the + middle. Neither commas nor quotes count if they are escaped. + Only double-quotes count, not single-quotes. """ - # XXX this function could probably use more testing + res = [] + part = '' - list = [] - end = len(s) - i = 0 - inquote = 0 - start = 0 - while i < end: - cur = s[i:] - c = cur.find(',') - q = cur.find('"') - if c == -1: - list.append(s[start:]) - break - if q == -1: - if inquote: - raise ValueError, "unbalanced quotes" - else: - list.append(s[start:i+c]) - i = i + c + 1 + escape = quote = False + for cur in s: + if escape: + part += cur + escape = False + continue + if quote: + if cur == '\\': + escape = True continue - if inquote: - if q < c: - list.append(s[start:i+c]) - i = i + c + 1 - start = i - inquote = 0 - else: - i = i + q - else: - if c < q: - list.append(s[start:i+c]) - i = i + c + 1 - start = i - else: - inquote = 1 - i = i + q + 1 - return map(lambda x: x.strip(), list) + elif cur == '"': + quote = False + part += cur + continue + + if cur == ',': + res.append(part) + part = '' + continue + + if cur == '"': + quote = True + + part += cur + + # append last part + if part: + res.append(part) + + return [part.strip() for part in res] class FileHandler(BaseHandler): # Use local file or FTP depending on form of URL @@ -1290,3 +1290,52 @@ if inspect.isclass(ph): ph = ph() opener.add_handler(ph) + +# Mapping status codes to official W3C names +httpresponses = { + 100: 'Continue', + 101: 'Switching Protocols', + + 200: 'OK', + 201: 'Created', + 202: 'Accepted', + 203: 'Non-Authoritative Information', + 204: 'No Content', + 205: 'Reset Content', + 206: 'Partial Content', + + 300: 'Multiple Choices', + 301: 'Moved Permanently', + 302: 'Found', + 303: 'See Other', + 304: 'Not Modified', + 305: 'Use Proxy', + 306: '(Unused)', + 307: 'Temporary Redirect', + + 400: 'Bad Request', + 401: 'Unauthorized', + 402: 'Payment Required', + 403: 'Forbidden', + 404: 'Not Found', + 405: 'Method Not Allowed', + 406: 'Not Acceptable', + 407: 'Proxy Authentication Required', + 408: 'Request Timeout', + 409: 'Conflict', + 410: 'Gone', + 411: 'Length Required', + 412: 'Precondition Failed', + 413: 'Request Entity Too Large', + 414: 'Request-URI Too Long', + 415: 'Unsupported Media Type', + 416: 'Requested Range Not Satisfiable', + 417: 'Expectation Failed', + + 500: 'Internal Server Error', + 501: 'Not Implemented', + 502: 'Bad Gateway', + 503: 'Service Unavailable', + 504: 'Gateway Timeout', + 505: 'HTTP Version Not Supported', +} Index: urlparse.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/urlparse.py,v retrieving revision 1.32.2.2 retrieving revision 1.32.2.3 diff -u -d -r1.32.2.2 -r1.32.2.3 --- urlparse.py 7 Jan 2005 06:58:11 -0000 1.32.2.2 +++ urlparse.py 16 Oct 2005 05:23:59 -0000 1.32.2.3 @@ -13,7 +13,8 @@ 'prospero', 'rtsp', 'rtspu', ''] uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', 'imap', 'wais', 'file', 'mms', 'https', 'shttp', - 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', ''] + 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', + 'svn', 'svn+ssh'] non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', 'telnet', 'wais', 'imap', 'snews', 'sip'] uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', @@ -63,6 +64,15 @@ i = url.find(';') return url[:i], url[i+1:] +def _splitnetloc(url, start=0): + for c in '/?#': # the order is important! + delim = url.find(c, start) + if delim >= 0: + break + else: + delim = len(url) + return url[start:delim], url[delim:] + def urlsplit(url, scheme='', allow_fragments=1): """Parse a URL into 5 components: :///?# @@ -82,13 +92,7 @@ scheme = url[:i].lower() url = url[i+1:] if url[:2] == '//': - i = url.find('/', 2) - if i < 0: - i = url.find('#') - if i < 0: - i = len(url) - netloc = url[2:i] - url = url[i:] + netloc, url = _splitnetloc(url, 2) if allow_fragments and '#' in url: url, fragment = url.split('#', 1) if '?' in url: @@ -101,12 +105,8 @@ break else: scheme, url = url[:i].lower(), url[i+1:] - if scheme in uses_netloc: - if url[:2] == '//': - i = url.find('/', 2) - if i < 0: - i = len(url) - netloc, url = url[2:i], url[i:] + if scheme in uses_netloc and url[:2] == '//': + netloc, url = _splitnetloc(url, 2) if allow_fragments and scheme in uses_fragment and '#' in url: url, fragment = url.split('#', 1) if scheme in uses_query and '?' in url: Index: warnings.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/warnings.py,v retrieving revision 1.16.2.2 retrieving revision 1.16.2.3 diff -u -d -r1.16.2.2 -r1.16.2.3 --- warnings.py 7 Jan 2005 06:58:11 -0000 1.16.2.2 +++ warnings.py 16 Oct 2005 05:23:59 -0000 1.16.2.3 @@ -50,7 +50,11 @@ filename = filename[:-1] else: if module == "__main__": - filename = sys.argv[0] + try: + filename = sys.argv[0] + except AttributeError: + # embedded interpreters don't have sys.argv, see bug #839151 + filename = '__main__' if not filename: filename = module registry = globals.setdefault("__warningregistry__", {}) @@ -216,7 +220,7 @@ if not action: return "default" if action == "all": return "always" # Alias - for a in ['default', 'always', 'ignore', 'module', 'once', 'error']: + for a in ('default', 'always', 'ignore', 'module', 'once', 'error'): if a.startswith(action): return a raise _OptionError("invalid action: %r" % (action,)) Index: weakref.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/weakref.py,v retrieving revision 1.17.2.2 retrieving revision 1.17.2.3 diff -u -d -r1.17.2.2 -r1.17.2.3 --- weakref.py 7 Jan 2005 06:58:11 -0000 1.17.2.2 +++ weakref.py 16 Oct 2005 05:23:59 -0000 1.17.2.3 @@ -43,12 +43,12 @@ # way in). def __init__(self, *args, **kw): - UserDict.UserDict.__init__(self, *args, **kw) def remove(wr, selfref=ref(self)): self = selfref() if self is not None: del self.data[wr.key] self._remove = remove + UserDict.UserDict.__init__(self, *args, **kw) def __getitem__(self, key): o = self.data[key]() Index: webbrowser.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/webbrowser.py,v retrieving revision 1.32.2.2 retrieving revision 1.32.2.3 diff -u -d -r1.32.2.2 -r1.32.2.3 --- webbrowser.py 7 Jan 2005 06:58:11 -0000 1.32.2.2 +++ webbrowser.py 16 Oct 2005 05:23:59 -0000 1.32.2.3 @@ -1,9 +1,11 @@ +#! /usr/bin/env python """Interfaces for launching and remotely controlling Web browsers.""" import os import sys +import stat -__all__ = ["Error", "open", "get", "register"] +__all__ = ["Error", "open", "open_new", "open_new_tab", "get", "register"] class Error(Exception): pass @@ -11,9 +13,13 @@ _browsers = {} # Dictionary of available browser controllers _tryorder = [] # Preference order of available browsers -def register(name, klass, instance=None): +def register(name, klass, instance=None, update_tryorder=1): """Register a browser connector and, optionally, connection.""" _browsers[name.lower()] = [klass, instance] + if update_tryorder > 0: + _tryorder.append(name) + elif update_tryorder < 0: + _tryorder.insert(0, name) def get(using=None): """Return a browser launcher instance appropriate for the environment.""" @@ -26,27 +32,36 @@ # User gave us a command line, don't mess with it. return GenericBrowser(browser) else: - # User gave us a browser name. + # User gave us a browser name or path. try: command = _browsers[browser.lower()] except KeyError: command = _synthesize(browser) - if command[1] is None: - return command[0]() - else: + if command[1] is not None: return command[1] + elif command[0] is not None: + return command[0]() raise Error("could not locate runnable browser") # Please note: the following definition hides a builtin function. +# It is recommended one does "import webbrowser" and uses webbrowser.open(url) +# instead of "from webbrowser import *". def open(url, new=0, autoraise=1): - get().open(url, new, autoraise) + for name in _tryorder: + browser = get(name) + if browser.open(url, new, autoraise): + return True + return False def open_new(url): - get().open(url, 1) + return open(url, 1) +def open_new_tab(url): + return open(url, 2) -def _synthesize(browser): + +def _synthesize(browser, update_tryorder=1): """Attempt to synthesize a controller base on existing controllers. This is useful to create a controller when a user specifies a path to @@ -58,9 +73,10 @@ executable for the requested browser, return [None, None]. """ - if not os.path.exists(browser): + cmd = browser.split()[0] + if not _iscommand(cmd): return [None, None] - name = os.path.basename(browser) + name = os.path.basename(cmd) try: command = _browsers[name.lower()] except KeyError: @@ -72,27 +88,67 @@ controller = copy.copy(controller) controller.name = browser controller.basename = os.path.basename(browser) - register(browser, None, controller) + register(browser, None, controller, update_tryorder) return [None, controller] return [None, None] +if sys.platform[:3] == "win": + def _isexecutable(cmd): + cmd = cmd.lower() + if os.path.isfile(cmd) and (cmd.endswith(".exe") or + cmd.endswith(".bat")): + return True + for ext in ".exe", ".bat": + if os.path.isfile(cmd + ext): + return True + return False +else: + def _isexecutable(cmd): + if os.path.isfile(cmd): + mode = os.stat(cmd)[stat.ST_MODE] + if mode & stat.S_IXUSR or mode & stat.S_IXGRP or mode & stat.S_IXOTH: + return True + return False + def _iscommand(cmd): - """Return True if cmd can be found on the executable search path.""" + """Return True if cmd is executable or can be found on the executable + search path.""" + if _isexecutable(cmd): + return True path = os.environ.get("PATH") if not path: return False for d in path.split(os.pathsep): exe = os.path.join(d, cmd) - if os.path.isfile(exe): + if _isexecutable(exe): return True return False -PROCESS_CREATION_DELAY = 4 +# General parent classes +class BaseBrowser(object): + """Parent class for all browsers.""" + + def __init__(self, name=""): + self.name = name + self.basename = name + + def open(self, url, new=0, autoraise=1): + raise NotImplementedError + + def open_new(self, url): + return self.open(url, 1) + + def open_new_tab(self, url): + return self.open(url, 2) + + +class GenericBrowser(BaseBrowser): + """Class for all browsers started with a command + and without remote functionality.""" -class GenericBrowser: def __init__(self, cmd): self.name, self.args = cmd.split(None, 1) self.basename = os.path.basename(self.name) @@ -100,104 +156,136 @@ def open(self, url, new=0, autoraise=1): assert "'" not in url command = "%s %s" % (self.name, self.args) - os.system(command % url) + rc = os.system(command % url) + return not rc - def open_new(self, url): - self.open(url) +class UnixBrowser(BaseBrowser): + """Parent class for all Unix browsers with remote functionality.""" -class Netscape: - "Launcher class for Netscape browsers." - def __init__(self, name): - self.name = name - self.basename = os.path.basename(name) + raise_opts = None - def _remote(self, action, autoraise): - raise_opt = ("-noraise", "-raise")[autoraise] - cmd = "%s %s -remote '%s' >/dev/null 2>&1" % (self.name, - raise_opt, - action) + remote_cmd = '' + remote_action = None + remote_action_newwin = None + remote_action_newtab = None + remote_background = False + + def _remote(self, url, action, autoraise): + autoraise = int(bool(autoraise)) # always 0/1 + raise_opt = self.raise_opts and self.raise_opts[autoraise] or '' + cmd = "%s %s %s '%s' >/dev/null 2>&1" % (self.name, raise_opt, + self.remote_cmd, action) + if self.remote_background: + cmd += ' &' rc = os.system(cmd) if rc: - import time - os.system("%s &" % self.name) - time.sleep(PROCESS_CREATION_DELAY) - rc = os.system(cmd) + # bad return status, try again with simpler command + rc = os.system("%s %s" % (self.name, url)) return not rc def open(self, url, new=0, autoraise=1): - if new: - self._remote("openURL(%s, new-window)"%url, autoraise) + assert "'" not in url + if new == 0: + action = self.remote_action + elif new == 1: + action = self.remote_action_newwin + elif new == 2: + if self.remote_action_newtab is None: + action = self.remote_action_newwin + else: + action = self.remote_action_newtab else: - self._remote("openURL(%s)" % url, autoraise) + raise Error("Bad 'new' parameter to open(); expected 0, 1, or 2, got %s" % new) + return self._remote(url, action % url, autoraise) - def open_new(self, url): - self.open(url, 1) +class Mozilla(UnixBrowser): + """Launcher class for Mozilla/Netscape browsers.""" -class Galeon: - """Launcher class for Galeon browsers.""" - def __init__(self, name): - self.name = name - self.basename = os.path.basename(name) + raise_opts = ("-noraise", "-raise") - def _remote(self, action, autoraise): - raise_opt = ("--noraise", "")[autoraise] - cmd = "%s %s %s >/dev/null 2>&1" % (self.name, raise_opt, action) - rc = os.system(cmd) - if rc: - import time - os.system("%s >/dev/null 2>&1 &" % self.name) - time.sleep(PROCESS_CREATION_DELAY) - rc = os.system(cmd) - return not rc + remote_cmd = '-remote' + remote_action = "openURL(%s)" + remote_action_newwin = "openURL(%s,new-window)" + remote_action_newtab = "openURL(%s,new-tab)" - def open(self, url, new=0, autoraise=1): - if new: - self._remote("-w '%s'" % url, autoraise) - else: - self._remote("-n '%s'" % url, autoraise) +Netscape = Mozilla - def open_new(self, url): - self.open(url, 1) +class Galeon(UnixBrowser): + """Launcher class for Galeon/Epiphany browsers.""" -class Konqueror: + raise_opts = ("-noraise", "") + remote_action = "-n '%s'" + remote_action_newwin = "-w '%s'" + + remote_background = True + + +class Konqueror(BaseBrowser): """Controller for the KDE File Manager (kfm, or Konqueror). See http://developer.kde.org/documentation/other/kfmclient.html for more information on the Konqueror remote-control interface. """ - def __init__(self): - if _iscommand("konqueror"): - self.name = self.basename = "konqueror" - else: - self.name = self.basename = "kfm" - def _remote(self, action): + def _remote(self, url, action): + # kfmclient is the new KDE way of opening URLs. cmd = "kfmclient %s >/dev/null 2>&1" % action rc = os.system(cmd) + # Fall back to other variants. if rc: - import time - if self.basename == "konqueror": - os.system(self.name + " --silent &") - else: - os.system(self.name + " -d &") - time.sleep(PROCESS_CREATION_DELAY) - rc = os.system(cmd) + if _iscommand("konqueror"): + rc = os.system(self.name + " --silent '%s' &" % url) + elif _iscommand("kfm"): + rc = os.system(self.name + " -d '%s'" % url) return not rc - def open(self, url, new=1, autoraise=1): + def open(self, url, new=0, autoraise=1): # XXX Currently I know no way to prevent KFM from # opening a new win. assert "'" not in url - self._remote("openURL '%s'" % url) + if new == 2: + action = "newTab '%s'" % url + else: + action = "openURL '%s'" % url + ok = self._remote(url, action) + return ok - open_new = open +class Opera(UnixBrowser): + "Launcher class for Opera browser." -class Grail: + raise_opts = ("", "-raise") + + remote_cmd = '-remote' + remote_action = "openURL(%s)" + remote_action_newwin = "openURL(%s,new-window)" + remote_action_newtab = "openURL(%s,new-page)" + + +class Elinks(UnixBrowser): + "Launcher class for Elinks browsers." + + remote_cmd = '-remote' + remote_action = "openURL(%s)" + remote_action_newwin = "openURL(%s,new-window)" + remote_action_newtab = "openURL(%s,new-tab)" + + def _remote(self, url, action, autoraise): + # elinks doesn't like its stdout to be redirected - + # it uses redirected stdout as a signal to do -dump + cmd = "%s %s '%s' 2>/dev/null" % (self.name, + self.remote_cmd, action) + rc = os.system(cmd) + if rc: + rc = os.system("%s %s" % (self.name, url)) + return not rc + + +class Grail(BaseBrowser): # There should be a way to maintain a connection to Grail, but the # Grail remote control protocol doesn't really allow that at this # point. It probably neverwill! @@ -237,93 +325,101 @@ def open(self, url, new=0, autoraise=1): if new: - self._remote("LOADNEW " + url) + ok = self._remote("LOADNEW " + url) else: - self._remote("LOAD " + url) - - def open_new(self, url): - self.open(url, 1) - - -class WindowsDefault: - def open(self, url, new=0, autoraise=1): - os.startfile(url) + ok = self._remote("LOAD " + url) + return ok - def open_new(self, url): - self.open(url) # # Platform support for Unix # -# This is the right test because all these Unix browsers require either -# a console terminal of an X display to run. Note that we cannot split -# the TERM and DISPLAY cases, because we might be running Python from inside -# an xterm. -if os.environ.get("TERM") or os.environ.get("DISPLAY"): - _tryorder = ["links", "lynx", "w3m"] - - # Easy cases first -- register console browsers if we have them. - if os.environ.get("TERM"): - # The Links browser - if _iscommand("links"): - register("links", None, GenericBrowser("links '%s'")) - # The Lynx browser - if _iscommand("lynx"): - register("lynx", None, GenericBrowser("lynx '%s'")) - # The w3m browser - if _iscommand("w3m"): - register("w3m", None, GenericBrowser("w3m '%s'")) +# These are the right tests because all these Unix browsers require either +# a console terminal or an X display to run. - # X browsers have more in the way of options - if os.environ.get("DISPLAY"): - _tryorder = ["galeon", "skipstone", - "mozilla-firefox", "mozilla-firebird", "mozilla", "netscape", - "kfm", "grail"] + _tryorder +def register_X_browsers(): + # First, the Mozilla/Netscape browsers + for browser in ("mozilla-firefox", "firefox", + "mozilla-firebird", "firebird", + "mozilla", "netscape"): + if _iscommand(browser): + register(browser, None, Mozilla(browser)) - # First, the Netscape series - for browser in ("mozilla-firefox", "mozilla-firebird", - "mozilla", "netscape"): - if _iscommand(browser): - register(browser, None, Netscape(browser)) + # The default Gnome browser + if _iscommand("gconftool-2"): + # get the web browser string from gconftool + gc = 'gconftool-2 -g /desktop/gnome/url-handlers/http/command' + out = os.popen(gc) + commd = out.read().strip() + retncode = out.close() - # Next, Mosaic -- old but still in use. - if _iscommand("mosaic"): - register("mosaic", None, GenericBrowser( - "mosaic '%s' >/dev/null &")) + # if successful, register it + if retncode == None and len(commd) != 0: + register("gnome", None, GenericBrowser( + commd + " '%s' >/dev/null &")) - # Gnome's Galeon - if _iscommand("galeon"): - register("galeon", None, Galeon("galeon")) + # Konqueror/kfm, the KDE browser. + if _iscommand("kfm"): + register("kfm", Konqueror, Konqueror("kfm")) + elif _iscommand("konqueror"): + register("konqueror", Konqueror, Konqueror("konqueror")) - # Skipstone, another Gtk/Mozilla based browser - if _iscommand("skipstone"): - register("skipstone", None, GenericBrowser( - "skipstone '%s' >/dev/null &")) + # Gnome's Galeon and Epiphany + for browser in ("galeon", "epiphany"): + if _iscommand(browser): + register(browser, None, Galeon(browser)) - # Konqueror/kfm, the KDE browser. - if _iscommand("kfm") or _iscommand("konqueror"): - register("kfm", Konqueror, Konqueror()) + # Skipstone, another Gtk/Mozilla based browser + if _iscommand("skipstone"): + register("skipstone", None, GenericBrowser("skipstone '%s' &")) - # Grail, the Python browser. - if _iscommand("grail"): - register("grail", Grail, None) + # Opera, quite popular + if _iscommand("opera"): + register("opera", None, Opera("opera")) + # Next, Mosaic -- old but still in use. + if _iscommand("mosaic"): + register("mosaic", None, GenericBrowser("mosaic '%s' &")) -class InternetConfig: - def open(self, url, new=0, autoraise=1): - ic.launchurl(url) + # Grail, the Python browser. Does anybody still use it? + if _iscommand("grail"): + register("grail", Grail, None) - def open_new(self, url): - self.open(url) +# Prefer X browsers if present +if os.environ.get("DISPLAY"): + register_X_browsers() +# Also try console browsers +if os.environ.get("TERM"): + # The Links/elinks browsers + if _iscommand("links"): + register("links", None, GenericBrowser("links '%s'")) + if _iscommand("elinks"): + register("elinks", None, Elinks("elinks")) + # The Lynx browser , + if _iscommand("lynx"): + register("lynx", None, GenericBrowser("lynx '%s'")) + # The w3m browser + if _iscommand("w3m"): + register("w3m", None, GenericBrowser("w3m '%s'")) # # Platform support for Windows # if sys.platform[:3] == "win": - _tryorder = ["netscape", "windows-default"] + class WindowsDefault(BaseBrowser): + def open(self, url, new=0, autoraise=1): + os.startfile(url) + return True # Oh, my... + + _tryorder = [] + _browsers = {} + # Prefer mozilla/netscape/opera if present + for browser in ("firefox", "firebird", "mozilla", "netscape", "opera"): + if _iscommand(browser): + register(browser, None, GenericBrowser(browser + ' %s')) register("windows-default", WindowsDefault) # @@ -335,36 +431,112 @@ except ImportError: pass else: - # internet-config is the only supported controller on MacOS, - # so don't mess with the default! - _tryorder = ["internet-config"] - register("internet-config", InternetConfig) + class InternetConfig(BaseBrowser): + def open(self, url, new=0, autoraise=1): + ic.launchurl(url) + return True # Any way to get status? + + register("internet-config", InternetConfig, update_tryorder=-1) + +if sys.platform == 'darwin': + # Adapted from patch submitted to SourceForge by Steven J. Burr + class MacOSX(BaseBrowser): + """Launcher class for Aqua browsers on Mac OS X + + Optionally specify a browser name on instantiation. Note that this + will not work for Aqua browsers if the user has moved the application + package after installation. + + If no browser is specified, the default browser, as specified in the + Internet System Preferences panel, will be used. + """ + def __init__(self, name): + self.name = name + + def open(self, url, new=0, autoraise=1): + assert "'" not in url + # new must be 0 or 1 + new = int(bool(new)) + if self.name == "default": + # User called open, open_new or get without a browser parameter + script = _safequote('open location "%s"', url) # opens in default browser + else: + # User called get and chose a browser + if self.name == "OmniWeb": + toWindow = "" + else: + # Include toWindow parameter of OpenURL command for browsers + # that support it. 0 == new window; -1 == existing + toWindow = "toWindow %d" % (new - 1) + cmd = _safequote('OpenURL "%s"', url) + script = '''tell application "%s" + activate + %s %s + end tell''' % (self.name, cmd, toWindow) + # Open pipe to AppleScript through osascript command + osapipe = os.popen("osascript", "w") + if osapipe is None: + return False + # Write script to osascript's stdin + osapipe.write(script) + rc = osapipe.close() + return not rc + + # Don't clear _tryorder or _browsers since OS X can use above Unix support + # (but we prefer using the OS X specific stuff) + register("MacOSX", None, MacOSX('default'), -1) + # # Platform support for OS/2 # -if sys.platform[:3] == "os2" and _iscommand("netscape.exe"): - _tryorder = ["os2netscape"] +if sys.platform[:3] == "os2" and _iscommand("netscape"): + _tryorder = [] + _browsers = {} register("os2netscape", None, - GenericBrowser("start netscape.exe %s")) + GenericBrowser("start netscape %s"), -1) + # OK, now that we know what the default preference orders for each # platform are, allow user to override them with the BROWSER variable. -# if "BROWSER" in os.environ: - # It's the user's responsibility to register handlers for any unknown - # browser referenced by this value, before calling open(). - _tryorder = os.environ["BROWSER"].split(os.pathsep) + _userchoices = os.environ["BROWSER"].split(os.pathsep) + _userchoices.reverse() -for cmd in _tryorder: - if not cmd.lower() in _browsers: - if _iscommand(cmd.lower()): - register(cmd.lower(), None, GenericBrowser( - "%s '%%s'" % cmd.lower())) -cmd = None # to make del work if _tryorder was empty -del cmd + # Treat choices in same way as if passed into get() but do register + # and prepend to _tryorder + for cmdline in _userchoices: + if cmdline != '': + _synthesize(cmdline, -1) + cmdline = None # to make del work if _userchoices was empty + del cmdline + del _userchoices -_tryorder = filter(lambda x: x.lower() in _browsers - or x.find("%s") > -1, _tryorder) # what to do if _tryorder is now empty? + + +def main(): + import getopt + usage = """Usage: %s [-n | -t] url + -n: open new window + -t: open new tab""" % sys.argv[0] + try: + opts, args = getopt.getopt(sys.argv[1:], 'ntd') + except getopt.error, msg: + print >>sys.stderr, msg + print >>sys.stderr, usage + sys.exit(1) + new_win = 0 + for o, a in opts: + if o == '-n': new_win = 1 + elif o == '-t': new_win = 2 + if len(args) <> 1: + print >>sys.stderr, usage + sys.exit(1) + + url = args[0] + open(url, new_win) + +if __name__ == "__main__": + main() Index: whichdb.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/whichdb.py,v retrieving revision 1.12.10.2 retrieving revision 1.12.10.3 diff -u -d -r1.12.10.2 -r1.12.10.3 --- whichdb.py 7 Jan 2005 06:58:11 -0000 1.12.10.2 +++ whichdb.py 16 Oct 2005 05:23:59 -0000 1.12.10.3 @@ -62,7 +62,7 @@ return "dumbdbm" f = open(filename + os.extsep + "dir", "rb") try: - if f.read(1) in ["'", '"']: + if f.read(1) in ("'", '"'): return "dumbdbm" finally: f.close() Index: xdrlib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/xdrlib.py,v retrieving revision 1.14.2.2 retrieving revision 1.14.2.3 diff -u -d -r1.14.2.2 -r1.14.2.3 --- xdrlib.py 7 Jan 2005 06:58:11 -0000 1.14.2.2 +++ xdrlib.py 16 Oct 2005 05:23:59 -0000 1.14.2.3 @@ -79,8 +79,8 @@ def pack_fstring(self, n, s): if n < 0: raise ValueError, 'fstring size must be nonnegative' - n = ((n+3)/4)*4 data = s[:n] + n = ((n+3)/4)*4 data = data + (n - len(data)) * '\0' self.__buf.write(data) @@ -157,7 +157,9 @@ return struct.unpack('>l', data)[0] unpack_enum = unpack_int - unpack_bool = unpack_int + + def unpack_bool(self): + return bool(self.unpack_int()) def unpack_uhyper(self): hi = self.unpack_uint() @@ -232,8 +234,8 @@ p = Packer() packtest = [ (p.pack_uint, (9,)), - (p.pack_bool, (None,)), - (p.pack_bool, ('hello',)), + (p.pack_bool, (True,)), + (p.pack_bool, (False,)), (p.pack_uhyper, (45L,)), (p.pack_float, (1.9,)), (p.pack_double, (1.9,)), @@ -257,8 +259,8 @@ up = Unpacker(data) unpacktest = [ (up.unpack_uint, (), lambda x: x == 9), - (up.unpack_bool, (), lambda x: not x), - (up.unpack_bool, (), lambda x: x), + (up.unpack_bool, (), lambda x: x is True), + (up.unpack_bool, (), lambda x: x is False), (up.unpack_uhyper, (), lambda x: x == 45L), (up.unpack_float, (), lambda x: 1.89 < x < 1.91), (up.unpack_double, (), lambda x: 1.89 < x < 1.91), Index: xmlrpclib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/xmlrpclib.py,v retrieving revision 1.20.2.2 retrieving revision 1.20.2.3 diff -u -d -r1.20.2.2 -r1.20.2.3 --- xmlrpclib.py 7 Jan 2005 06:58:12 -0000 1.20.2.2 +++ xmlrpclib.py 16 Oct 2005 05:23:59 -0000 1.20.2.3 @@ -149,6 +149,11 @@ unicode = None # unicode support not available try: + import datetime +except ImportError: + datetime = None + +try: _bool_is_builtin = False.__class__.__name__ == "bool" except NameError: _bool_is_builtin = 0 @@ -168,7 +173,7 @@ def _stringify(string): # convert to 7-bit ascii if possible try: - return str(string) + return string.encode("ascii") except UnicodeError: return string else: @@ -349,6 +354,16 @@ def __init__(self, value=0): if not isinstance(value, StringType): + if datetime and isinstance(value, datetime.datetime): + self.value = value.strftime("%Y%m%dT%H:%M:%S") + return + if datetime and isinstance(value, datetime.date): + self.value = value.strftime("%Y%m%dT%H:%M:%S") + return + if datetime and isinstance(value, datetime.time): + today = datetime.datetime.now().strftime("%Y%m%d") + self.value = value.strftime(today+"T%H:%M:%S") + return if not isinstance(value, (TupleType, time.struct_time)): if value == 0: value = time.time() @@ -386,6 +401,10 @@ value.decode(data) return value +def _datetime_type(data): + t = time.strptime(data, "%Y%m%dT%H:%M:%S") + return datetime.datetime(*tuple(t)[:6]) + ## # Wrapper for binary data. This can be used to transport any kind # of binary data over XML-RPC, using BASE64 encoding. @@ -699,6 +718,26 @@ del self.memo[i] dispatch[DictType] = dump_struct + if datetime: + def dump_datetime(self, value, write): + write("") + write(value.strftime("%Y%m%dT%H:%M:%S")) + write("\n") + dispatch[datetime.datetime] = dump_datetime + + def dump_date(self, value, write): + write("") + write(value.strftime("%Y%m%dT00:00:00")) + write("\n") + dispatch[datetime.date] = dump_date + + def dump_time(self, value, write): + write("") + write(datetime.datetime.now().date().strftime("%Y%m%dT")) + write(value.strftime("%H:%M:%S")) + write("\n") + dispatch[datetime.time] = dump_time + def dump_instance(self, value, write): # check for special wrappers if value.__class__ in WRAPPERS: @@ -727,7 +766,7 @@ # and again, if you don't understand what's going on in here, # that's perfectly ok. - def __init__(self): + def __init__(self, use_datetime=0): self._type = None self._stack = [] self._marks = [] @@ -735,6 +774,9 @@ self._methodname = None self._encoding = "utf-8" self.append = self._stack.append + self._use_datetime = use_datetime + if use_datetime and not datetime: + raise ValueError, "the datetime module is not available" def close(self): # return response tuple and target method @@ -852,6 +894,8 @@ def end_dateTime(self, data): value = DateTime() value.decode(data) + if self._use_datetime: + value = _datetime_type(data) self.append(value) dispatch["dateTime.iso8601"] = end_dateTime @@ -953,17 +997,23 @@ # # return A (parser, unmarshaller) tuple. -def getparser(): +def getparser(use_datetime=0): """getparser() -> parser, unmarshaller Create an instance of the fastest available parser, and attach it to an unmarshalling object. Return both objects. """ + if use_datetime and not datetime: + raise ValueError, "the datetime module is not available" if FastParser and FastUnmarshaller: - target = FastUnmarshaller(True, False, _binary, _datetime, Fault) + if use_datetime: + mkdatetime = _datetime_type + else: + mkdatetime = _datetime + target = FastUnmarshaller(True, False, _binary, mkdatetime, Fault) parser = FastParser(target) else: - target = Unmarshaller() + target = Unmarshaller(use_datetime=use_datetime) if FastParser: parser = FastParser(target) elif SgmlopParser: @@ -1066,7 +1116,7 @@ # (None if not present). # @see Fault -def loads(data): +def loads(data, use_datetime=0): """data -> unmarshalled data, method name Convert an XML-RPC packet to unmarshalled data plus a method @@ -1075,7 +1125,7 @@ If the XML-RPC packet represents a fault condition, this function raises a Fault exception. """ - p, u = getparser() + p, u = getparser(use_datetime=use_datetime) p.feed(data) p.close() return u.close(), u.getmethodname() @@ -1107,6 +1157,9 @@ # client identifier (may be overridden) user_agent = "xmlrpclib.py/%s (by www.pythonware.com)" % __version__ + def __init__(self, use_datetime=0): + self._use_datetime = use_datetime + ## # Send a complete request, and parse the response. # @@ -1153,7 +1206,7 @@ def getparser(self): # get parser and unmarshaller - return getparser() + return getparser(use_datetime=self._use_datetime) ## # Get authorization info from host parameter @@ -1347,7 +1400,7 @@ """ def __init__(self, uri, transport=None, encoding=None, verbose=0, - allow_none=0): + allow_none=0, use_datetime=0): # establish a "logical" server connection # get the url @@ -1361,9 +1414,9 @@ if transport is None: if type == "https": - transport = SafeTransport() + transport = SafeTransport(use_datetime=use_datetime) else: - transport = Transport() + transport = Transport(use_datetime=use_datetime) self.__transport = transport self.__encoding = encoding Index: zipfile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/zipfile.py,v retrieving revision 1.24.2.2 retrieving revision 1.24.2.3 diff -u -d -r1.24.2.2 -r1.24.2.3 --- zipfile.py 7 Jan 2005 06:58:13 -0000 1.24.2.2 +++ zipfile.py 16 Oct 2005 05:23:59 -0000 1.24.2.3 @@ -193,7 +193,7 @@ self.NameToInfo = {} # Find file info given name self.filelist = [] # List of ZipInfo instances for archive self.compression = compression # Method of compression - self.mode = key = mode[0].replace('b', '') + self.mode = key = mode.replace('b', '')[0] # Check if we were passed a file-like object if isinstance(file, basestring): --- profile.doc DELETED --- From jhylton at users.sourceforge.net Sun Oct 16 07:24:41 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:41 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Modules _hashopenssl.c, NONE, 2.1.2.2 functionalmodule.c, NONE, 1.2.2.2 sha256module.c, NONE, 2.1.2.2 sha512module.c, NONE, 2.2.2.2 spwdmodule.c, NONE, 1.2.2.2 Setup.dist, 1.28.2.2, 1.28.2.3 _bisectmodule.c, 1.2.6.1, 1.2.6.2 _bsddb.c, 1.13.4.2, 1.13.4.3 _codecsmodule.c, 2.12.2.2, 2.12.2.3 _csv.c, 1.11.4.2, 1.11.4.3 _cursesmodule.c, 2.65.2.2, 2.65.2.3 _localemodule.c, 2.33.2.2, 2.33.2.3 _randommodule.c, 1.5.4.2, 1.5.4.3 _tkinter.c, 1.125.2.2, 1.125.2.3 arraymodule.c, 2.75.2.2, 2.75.2.3 bz2module.c, 1.17.2.2, 1.17.2.3 cStringIO.c, 2.36.2.2, 2.36.2.3 collectionsmodule.c, 1.36.4.1, 1.36.4.2 datetimemodule.c, 1.60.4.2, 1.60.4.3 fcntlmodule.c, 2.35.2.2, 2.35.2.3 gcmodule.c, 2.52.2.2, 2.52.2.3 getpath.c, 1.41.2.2, 1.41.2.3 grpmodule.c, 2.18.2.2, 2.18.2.3 itertoolsmodule.c, 1.10.4.2, 1.10.4.3 ld_so_aix, 2.5, 2.5.32.1 main.c, 1.64.2.3, 1.64.2.4 makexp_aix, 2.2, 2.2.32.1 mathmodule.c, 2.68.2.2, 2.68.2.3 md5module.c, 2.30.2.2, 2.30.2.3 mmapmodule.c, 2.39.2.2, 2.39.2.3 operator.c, 2.21.2.2, 2.21.2.3 ossaudiodev.c, 1.26.4.2, 1.26.4.3 parsermodule.c, 2.70.2.2, 2.70.2.3 posixmodule.c, 2.241.2.2, 2.241.2.3 pwdmodule.c, 1.34.2.2, 1.34.2.3 pyexpat.c, 2.67.2.2, 2.67.2.3 readline.c, 2.51.2.2, 2.51.2.3 shamodule.c, 2.19.2.1, 2.19.2.2 signalmodule.c, 2.70.2.2, 2.70.2.3 socketmodule.c, 1.229.2.2, 1.229.2.3 socketmodule.h, 1.8.2.1, 1.8.2.2 structmodule.c, 2.55.2.2, 2.55.2.3 threadmodule.c, 2.50.2.2, 2.50.2.3 unicodedata.c, 2.18.2.2, 2.18.2.3 zipimport.c, 1.13.4.2, 1.13.4.3 Message-ID: <20051016052441.3FCAD1E400B@bag.python.org> Update of /cvsroot/python/python/dist/src/Modules In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Modules Modified Files: Tag: ast-branch Setup.dist _bisectmodule.c _bsddb.c _codecsmodule.c _csv.c _cursesmodule.c _localemodule.c _randommodule.c _tkinter.c arraymodule.c bz2module.c cStringIO.c collectionsmodule.c datetimemodule.c fcntlmodule.c gcmodule.c getpath.c grpmodule.c itertoolsmodule.c ld_so_aix main.c makexp_aix mathmodule.c md5module.c mmapmodule.c operator.c ossaudiodev.c parsermodule.c posixmodule.c pwdmodule.c pyexpat.c readline.c shamodule.c signalmodule.c socketmodule.c socketmodule.h structmodule.c threadmodule.c unicodedata.c zipimport.c Added Files: Tag: ast-branch _hashopenssl.c functionalmodule.c sha256module.c sha512module.c spwdmodule.c Log Message: Merge head to branch (for the last time) --- NEW FILE: _hashopenssl.c --- /* Module that wraps all OpenSSL hash algorithms */ /* * Copyright (C) 2005 Gregory P. Smith (greg at electricrain.com) * Licensed to PSF under a Contributor Agreement. * * Derived from a skeleton of shamodule.c containing work performed by: * * Andrew Kuchling (amk at amk.ca) * Greg Stein (gstein at lyra.org) * */ #include "Python.h" #include "structmember.h" /* EVP is the preferred interface to hashing in OpenSSL */ #include typedef struct { PyObject_HEAD PyObject *name; /* name of this hash algorithm */ EVP_MD_CTX ctx; /* OpenSSL message digest context */ } EVPobject; static PyTypeObject EVPtype; #define DEFINE_CONSTS_FOR_NEW(Name) \ static PyObject *CONST_ ## Name ## _name_obj; \ static EVP_MD_CTX CONST_new_ ## Name ## _ctx; \ static EVP_MD_CTX *CONST_new_ ## Name ## _ctx_p = NULL; DEFINE_CONSTS_FOR_NEW(md5); DEFINE_CONSTS_FOR_NEW(sha1); DEFINE_CONSTS_FOR_NEW(sha224); DEFINE_CONSTS_FOR_NEW(sha256); DEFINE_CONSTS_FOR_NEW(sha384); DEFINE_CONSTS_FOR_NEW(sha512); static EVPobject * newEVPobject(PyObject *name) { EVPobject *retval = (EVPobject *)PyObject_New(EVPobject, &EVPtype); /* save the name for .name to return */ if (retval != NULL) { Py_INCREF(name); retval->name = name; } return retval; } /* Internal methods for a hash object */ static void EVP_dealloc(PyObject *ptr) { EVP_MD_CTX_cleanup(&((EVPobject *)ptr)->ctx); Py_XDECREF(((EVPobject *)ptr)->name); PyObject_Del(ptr); } /* External methods for a hash object */ PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object."); static PyObject * EVP_copy(EVPobject *self, PyObject *args) { EVPobject *newobj; if (!PyArg_ParseTuple(args, ":copy")) return NULL; if ( (newobj = newEVPobject(self->name))==NULL) return NULL; EVP_MD_CTX_copy(&newobj->ctx, &self->ctx); return (PyObject *)newobj; } PyDoc_STRVAR(EVP_digest__doc__, "Return the digest value as a string of binary data."); static PyObject * EVP_digest(EVPobject *self, PyObject *args) { unsigned char digest[EVP_MAX_MD_SIZE]; EVP_MD_CTX temp_ctx; PyObject *retval; unsigned int digest_size; if (!PyArg_ParseTuple(args, ":digest")) return NULL; EVP_MD_CTX_copy(&temp_ctx, &self->ctx); digest_size = EVP_MD_CTX_size(&temp_ctx); EVP_DigestFinal(&temp_ctx, (char *)digest, NULL); retval = PyString_FromStringAndSize((const char *)digest, digest_size); EVP_MD_CTX_cleanup(&temp_ctx); return retval; } PyDoc_STRVAR(EVP_hexdigest__doc__, "Return the digest value as a string of hexadecimal digits."); static PyObject * EVP_hexdigest(EVPobject *self, PyObject *args) { unsigned char digest[EVP_MAX_MD_SIZE]; EVP_MD_CTX temp_ctx; PyObject *retval; char *hex_digest; unsigned int i, j, digest_size; if (!PyArg_ParseTuple(args, ":hexdigest")) return NULL; /* Get the raw (binary) digest value */ EVP_MD_CTX_copy(&temp_ctx, &self->ctx); digest_size = EVP_MD_CTX_size(&temp_ctx); EVP_DigestFinal(&temp_ctx, digest, NULL); EVP_MD_CTX_cleanup(&temp_ctx); /* Create a new string */ /* NOTE: not thread safe! modifying an already created string object */ /* (not a problem because we hold the GIL by default) */ retval = PyString_FromStringAndSize(NULL, digest_size * 2); if (!retval) return NULL; hex_digest = PyString_AsString(retval); if (!hex_digest) { Py_DECREF(retval); return NULL; } /* Make hex version of the digest */ for(i=j=0; i> 4) & 0xf; c = (c>9) ? c+'a'-10 : c + '0'; hex_digest[j++] = c; c = (digest[i] & 0xf); c = (c>9) ? c+'a'-10 : c + '0'; hex_digest[j++] = c; } return retval; } PyDoc_STRVAR(EVP_update__doc__, "Update this hash object's state with the provided string."); static PyObject * EVP_update(EVPobject *self, PyObject *args) { unsigned char *cp; int len; if (!PyArg_ParseTuple(args, "s#:update", &cp, &len)) return NULL; EVP_DigestUpdate(&self->ctx, cp, len); Py_INCREF(Py_None); return Py_None; } static PyMethodDef EVP_methods[] = { {"update", (PyCFunction)EVP_update, METH_VARARGS, EVP_update__doc__}, {"digest", (PyCFunction)EVP_digest, METH_VARARGS, EVP_digest__doc__}, {"hexdigest", (PyCFunction)EVP_hexdigest, METH_VARARGS, EVP_hexdigest__doc__}, {"copy", (PyCFunction)EVP_copy, METH_VARARGS, EVP_copy__doc__}, {NULL, NULL} /* sentinel */ }; static PyObject * EVP_get_block_size(EVPobject *self, void *closure) { return PyInt_FromLong(EVP_MD_CTX_block_size(&((EVPobject *)self)->ctx)); } static PyObject * EVP_get_digest_size(EVPobject *self, void *closure) { return PyInt_FromLong(EVP_MD_CTX_size(&((EVPobject *)self)->ctx)); } static PyMemberDef EVP_members[] = { {"name", T_OBJECT, offsetof(EVPobject, name), READONLY, PyDoc_STR("algorithm name.")}, {NULL} /* Sentinel */ }; static PyGetSetDef EVP_getseters[] = { {"digest_size", (getter)EVP_get_digest_size, NULL, NULL, NULL}, {"block_size", (getter)EVP_get_block_size, NULL, NULL, NULL}, /* the old md5 and sha modules support 'digest_size' as in PEP 247. * the old sha module also supported 'digestsize'. ugh. */ {"digestsize", (getter)EVP_get_digest_size, NULL, NULL, NULL}, {NULL} /* Sentinel */ }; static PyObject * EVP_repr(PyObject *self) { char buf[100]; PyOS_snprintf(buf, sizeof(buf), "<%s HASH object @ %p>", PyString_AsString(((EVPobject *)self)->name), self); return PyString_FromString(buf); } #if HASH_OBJ_CONSTRUCTOR static int EVP_tp_init(EVPobject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"name", "string", NULL}; PyObject *name_obj = NULL; char *nameStr; unsigned char *cp = NULL; unsigned int len; const EVP_MD *digest; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s#:HASH", kwlist, &name_obj, &cp, &len)) { return -1; } if (!PyArg_Parse(name_obj, "s", &nameStr)) { PyErr_SetString(PyExc_TypeError, "name must be a string"); return -1; } digest = EVP_get_digestbyname(nameStr); if (!digest) { PyErr_SetString(PyExc_ValueError, "unknown hash function"); return -1; } EVP_DigestInit(&self->ctx, digest); self->name = name_obj; Py_INCREF(self->name); if (cp && len) EVP_DigestUpdate(&self->ctx, cp, len); return 0; } #endif PyDoc_STRVAR(hashtype_doc, "A hash represents the object used to calculate a checksum of a\n\ string of information.\n\ \n\ Methods:\n\ \n\ update() -- updates the current digest with an additional string\n\ digest() -- return the current digest value\n\ hexdigest() -- return the current digest as a string of hexadecimal digits\n\ copy() -- return a copy of the current hash object\n\ \n\ Attributes:\n\ \n\ name -- the hash algorithm being used by this object\n\ digest_size -- number of bytes in this hashes output\n"); static PyTypeObject EVPtype = { PyObject_HEAD_INIT(NULL) 0, /*ob_size*/ "_hashlib.HASH", /*tp_name*/ sizeof(EVPobject), /*tp_basicsize*/ 0, /*tp_itemsize*/ /* methods */ EVP_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ EVP_repr, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ hashtype_doc, /*tp_doc*/ 0, /*tp_traverse*/ 0, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ EVP_methods, /* tp_methods */ EVP_members, /* tp_members */ EVP_getseters, /* tp_getset */ #if 1 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ #endif #if HASH_OBJ_CONSTRUCTOR (initproc)EVP_tp_init, /* tp_init */ #endif }; static PyObject * EVPnew(PyObject *name_obj, const EVP_MD *digest, const EVP_MD_CTX *initial_ctx, const char *cp, unsigned int len) { EVPobject *self; if (!digest && !initial_ctx) { PyErr_SetString(PyExc_ValueError, "unsupported hash type"); return NULL; } if ((self = newEVPobject(name_obj)) == NULL) return NULL; if (initial_ctx) { EVP_MD_CTX_copy(&self->ctx, initial_ctx); } else { EVP_DigestInit(&self->ctx, digest); } if (cp && len) EVP_DigestUpdate(&self->ctx, cp, len); return (PyObject *)self; } /* The module-level function: new() */ PyDoc_STRVAR(EVP_new__doc__, "Return a new hash object using the named algorithm.\n\ An optional string argument may be provided and will be\n\ automatically hashed.\n\ \n\ The MD5 and SHA1 algorithms are always supported.\n"); static PyObject * EVP_new(PyObject *self, PyObject *args, PyObject *kwdict) { static char *kwlist[] = {"name", "string", NULL}; PyObject *name_obj = NULL; char *name; const EVP_MD *digest; unsigned char *cp = NULL; unsigned int len; if (!PyArg_ParseTupleAndKeywords(args, kwdict, "O|s#:new", kwlist, &name_obj, &cp, &len)) { return NULL; } if (!PyArg_Parse(name_obj, "s", &name)) { PyErr_SetString(PyExc_TypeError, "name must be a string"); return NULL; } digest = EVP_get_digestbyname(name); return EVPnew(name_obj, digest, NULL, cp, len); } /* * This macro generates constructor function definitions for specific * hash algorithms. These constructors are much faster than calling * the generic one passing it a python string and are noticably * faster than calling a python new() wrapper. Thats important for * code that wants to make hashes of a bunch of small strings. */ #define GEN_CONSTRUCTOR(NAME) \ static PyObject * \ EVP_new_ ## NAME (PyObject *self, PyObject *args) \ { \ unsigned char *cp = NULL; \ unsigned int len; \ \ if (!PyArg_ParseTuple(args, "|s#:" #NAME , &cp, &len)) { \ return NULL; \ } \ \ return EVPnew( \ CONST_ ## NAME ## _name_obj, \ NULL, \ CONST_new_ ## NAME ## _ctx_p, \ cp, len); \ } /* a PyMethodDef structure for the constructor */ #define CONSTRUCTOR_METH_DEF(NAME) \ {"openssl_" #NAME, (PyCFunction)EVP_new_ ## NAME, METH_VARARGS, \ PyDoc_STR("Returns a " #NAME \ " hash object; optionally initialized with a string") \ } /* used in the init function to setup a constructor */ #define INIT_CONSTRUCTOR_CONSTANTS(NAME) do { \ CONST_ ## NAME ## _name_obj = PyString_FromString(#NAME); \ if (EVP_get_digestbyname(#NAME)) { \ CONST_new_ ## NAME ## _ctx_p = &CONST_new_ ## NAME ## _ctx; \ EVP_DigestInit(CONST_new_ ## NAME ## _ctx_p, EVP_get_digestbyname(#NAME)); \ } \ } while (0); GEN_CONSTRUCTOR(md5) GEN_CONSTRUCTOR(sha1) GEN_CONSTRUCTOR(sha224) GEN_CONSTRUCTOR(sha256) GEN_CONSTRUCTOR(sha384) GEN_CONSTRUCTOR(sha512) /* List of functions exported by this module */ static struct PyMethodDef EVP_functions[] = { {"new", (PyCFunction)EVP_new, METH_VARARGS|METH_KEYWORDS, EVP_new__doc__}, CONSTRUCTOR_METH_DEF(md5), CONSTRUCTOR_METH_DEF(sha1), CONSTRUCTOR_METH_DEF(sha224), CONSTRUCTOR_METH_DEF(sha256), CONSTRUCTOR_METH_DEF(sha384), CONSTRUCTOR_METH_DEF(sha512), {NULL, NULL} /* Sentinel */ }; /* Initialize this module. */ PyMODINIT_FUNC init_hashlib(void) { PyObject *m; OpenSSL_add_all_digests(); /* TODO build EVP_functions openssl_* entries dynamically based * on what hashes are supported rather than listing many * but having some be unsupported. Only init appropriate * constants. */ EVPtype.ob_type = &PyType_Type; if (PyType_Ready(&EVPtype) < 0) return; m = Py_InitModule("_hashlib", EVP_functions); if (m == NULL) return; #if HASH_OBJ_CONSTRUCTOR Py_INCREF(&EVPtype); PyModule_AddObject(m, "HASH", (PyObject *)&EVPtype); #endif /* these constants are used by the convenience constructors */ INIT_CONSTRUCTOR_CONSTANTS(md5); INIT_CONSTRUCTOR_CONSTANTS(sha1); INIT_CONSTRUCTOR_CONSTANTS(sha224); INIT_CONSTRUCTOR_CONSTANTS(sha256); INIT_CONSTRUCTOR_CONSTANTS(sha384); INIT_CONSTRUCTOR_CONSTANTS(sha512); } --- NEW FILE: functionalmodule.c --- #include "Python.h" #include "structmember.h" /* Functional module written and maintained by Hye-Shik Chang with adaptations by Raymond Hettinger Copyright (c) 2004, 2005 Python Software Foundation. All rights reserved. */ /* partial object **********************************************************/ typedef struct { PyObject_HEAD PyObject *fn; PyObject *args; PyObject *kw; PyObject *dict; PyObject *weakreflist; /* List of weak references */ } partialobject; static PyTypeObject partial_type; static PyObject * partial_new(PyTypeObject *type, PyObject *args, PyObject *kw) { PyObject *func; partialobject *pto; if (PyTuple_GET_SIZE(args) < 1) { PyErr_SetString(PyExc_TypeError, "type 'partial' takes at least one argument"); return NULL; } func = PyTuple_GET_ITEM(args, 0); if (!PyCallable_Check(func)) { PyErr_SetString(PyExc_TypeError, "the first argument must be callable"); return NULL; } /* create partialobject structure */ pto = (partialobject *)type->tp_alloc(type, 0); if (pto == NULL) return NULL; pto->fn = func; Py_INCREF(func); pto->args = PyTuple_GetSlice(args, 1, INT_MAX); if (pto->args == NULL) { pto->kw = NULL; Py_DECREF(pto); return NULL; } if (kw != NULL) { pto->kw = PyDict_Copy(kw); if (pto->kw == NULL) { Py_DECREF(pto); return NULL; } } else { pto->kw = Py_None; Py_INCREF(Py_None); } pto->weakreflist = NULL; pto->dict = NULL; return (PyObject *)pto; } static void partial_dealloc(partialobject *pto) { PyObject_GC_UnTrack(pto); if (pto->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) pto); Py_XDECREF(pto->fn); Py_XDECREF(pto->args); Py_XDECREF(pto->kw); Py_XDECREF(pto->dict); pto->ob_type->tp_free(pto); } static PyObject * partial_call(partialobject *pto, PyObject *args, PyObject *kw) { PyObject *ret; PyObject *argappl = NULL, *kwappl = NULL; assert (PyCallable_Check(pto->fn)); assert (PyTuple_Check(pto->args)); assert (pto->kw == Py_None || PyDict_Check(pto->kw)); if (PyTuple_GET_SIZE(pto->args) == 0) { argappl = args; Py_INCREF(args); } else if (PyTuple_GET_SIZE(args) == 0) { argappl = pto->args; Py_INCREF(pto->args); } else { argappl = PySequence_Concat(pto->args, args); if (argappl == NULL) return NULL; } if (pto->kw == Py_None) { kwappl = kw; Py_XINCREF(kw); } else { kwappl = PyDict_Copy(pto->kw); if (kwappl == NULL) { Py_DECREF(argappl); return NULL; } if (kw != NULL) { if (PyDict_Merge(kwappl, kw, 1) != 0) { Py_DECREF(argappl); Py_DECREF(kwappl); return NULL; } } } ret = PyObject_Call(pto->fn, argappl, kwappl); Py_DECREF(argappl); Py_XDECREF(kwappl); return ret; } static int partial_traverse(partialobject *pto, visitproc visit, void *arg) { Py_VISIT(pto->fn); Py_VISIT(pto->args); Py_VISIT(pto->kw); Py_VISIT(pto->dict); return 0; } PyDoc_STRVAR(partial_doc, "partial(func, *args, **keywords) - new function with partial application\n\ of the given arguments and keywords.\n"); #define OFF(x) offsetof(partialobject, x) static PyMemberDef partial_memberlist[] = { {"func", T_OBJECT, OFF(fn), READONLY, "function object to use in future partial calls"}, {"args", T_OBJECT, OFF(args), READONLY, "tuple of arguments to future partial calls"}, {"keywords", T_OBJECT, OFF(kw), READONLY, "dictionary of keyword arguments to future partial calls"}, {NULL} /* Sentinel */ }; static PyObject * partial_get_dict(partialobject *pto) { if (pto->dict == NULL) { pto->dict = PyDict_New(); if (pto->dict == NULL) return NULL; } Py_INCREF(pto->dict); return pto->dict; } static int partial_set_dict(partialobject *pto, PyObject *value) { PyObject *tmp; /* It is illegal to del p.__dict__ */ if (value == NULL) { PyErr_SetString(PyExc_TypeError, "a partial object's dictionary may not be deleted"); return -1; } /* Can only set __dict__ to a dictionary */ if (!PyDict_Check(value)) { PyErr_SetString(PyExc_TypeError, "setting partial object's dictionary to a non-dict"); return -1; } tmp = pto->dict; Py_INCREF(value); pto->dict = value; Py_XDECREF(tmp); return 0; } static PyGetSetDef partail_getsetlist[] = { {"__dict__", (getter)partial_get_dict, (setter)partial_set_dict}, {NULL} /* Sentinel */ }; static PyTypeObject partial_type = { PyObject_HEAD_INIT(NULL) 0, /* ob_size */ "functional.partial", /* tp_name */ sizeof(partialobject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ (destructor)partial_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ (ternaryfunc)partial_call, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ PyObject_GenericSetAttr, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */ partial_doc, /* tp_doc */ (traverseproc)partial_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ offsetof(partialobject, weakreflist), /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ 0, /* tp_methods */ partial_memberlist, /* tp_members */ partail_getsetlist, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ offsetof(partialobject, dict), /* tp_dictoffset */ 0, /* tp_init */ 0, /* tp_alloc */ partial_new, /* tp_new */ PyObject_GC_Del, /* tp_free */ }; /* module level code ********************************************************/ PyDoc_STRVAR(module_doc, "Tools for functional programming."); static PyMethodDef module_methods[] = { {NULL, NULL} /* sentinel */ }; PyMODINIT_FUNC initfunctional(void) { int i; PyObject *m; char *name; PyTypeObject *typelist[] = { &partial_type, NULL }; m = Py_InitModule3("functional", module_methods, module_doc); for (i=0 ; typelist[i] != NULL ; i++) { if (PyType_Ready(typelist[i]) < 0) return; name = strchr(typelist[i]->tp_name, '.'); assert (name != NULL); Py_INCREF(typelist[i]); PyModule_AddObject(m, name+1, (PyObject *)typelist[i]); } } --- NEW FILE: sha256module.c --- /* SHA256 module */ /* This module provides an interface to NIST's SHA-256 and SHA-224 Algorithms */ /* See below for information about the original code this module was based upon. Additional work performed by: Andrew Kuchling (amk at amk.ca) Greg Stein (gstein at lyra.org) Trevor Perrin (trevp at trevp.net) Copyright (C) 2005 Gregory P. Smith (greg at electricrain.com) Licensed to PSF under a Contributor Agreement. */ /* SHA objects */ #include "Python.h" #include "structmember.h" /* Endianness testing and definitions */ #define TestEndianness(variable) {int i=1; variable=PCT_BIG_ENDIAN;\ if (*((char*)&i)==1) variable=PCT_LITTLE_ENDIAN;} #define PCT_LITTLE_ENDIAN 1 #define PCT_BIG_ENDIAN 0 /* Some useful types */ typedef unsigned char SHA_BYTE; #if SIZEOF_INT == 4 typedef unsigned int SHA_INT32; /* 32-bit integer */ #else /* not defined. compilation will die. */ #endif /* The SHA block size and message digest sizes, in bytes */ #define SHA_BLOCKSIZE 64 #define SHA_DIGESTSIZE 32 /* The structure for storing SHA info */ typedef struct { PyObject_HEAD SHA_INT32 digest[8]; /* Message digest */ SHA_INT32 count_lo, count_hi; /* 64-bit bit count */ SHA_BYTE data[SHA_BLOCKSIZE]; /* SHA data buffer */ int Endianness; int local; /* unprocessed amount in data */ int digestsize; } SHAobject; /* When run on a little-endian CPU we need to perform byte reversal on an array of longwords. */ static void longReverse(SHA_INT32 *buffer, int byteCount, int Endianness) { SHA_INT32 value; if ( Endianness == PCT_BIG_ENDIAN ) return; byteCount /= sizeof(*buffer); while (byteCount--) { value = *buffer; value = ( ( value & 0xFF00FF00L ) >> 8 ) | \ ( ( value & 0x00FF00FFL ) << 8 ); *buffer++ = ( value << 16 ) | ( value >> 16 ); } } static void SHAcopy(SHAobject *src, SHAobject *dest) { dest->Endianness = src->Endianness; dest->local = src->local; dest->digestsize = src->digestsize; dest->count_lo = src->count_lo; dest->count_hi = src->count_hi; memcpy(dest->digest, src->digest, sizeof(src->digest)); memcpy(dest->data, src->data, sizeof(src->data)); } /* ------------------------------------------------------------------------ * * This code for the SHA-256 algorithm was noted as public domain. The * original headers are pasted below. * * Several changes have been made to make it more compatible with the * Python environment and desired interface. * */ /* LibTomCrypt, modular cryptographic library -- Tom St Denis * * LibTomCrypt is a library that provides various cryptographic * algorithms in a highly modular and flexible manner. * * The library is free for all purposes without any express * gurantee it works. * * Tom St Denis, tomstdenis at iahu.ca, http://libtomcrypt.org */ /* SHA256 by Tom St Denis */ /* Various logical functions */ #define ROR(x, y)\ ( ((((unsigned long)(x)&0xFFFFFFFFUL)>>(unsigned long)((y)&31)) | \ ((unsigned long)(x)<<(unsigned long)(32-((y)&31)))) & 0xFFFFFFFFUL) #define Ch(x,y,z) (z ^ (x & (y ^ z))) #define Maj(x,y,z) (((x | y) & z) | (x & y)) #define S(x, n) ROR((x),(n)) #define R(x, n) (((x)&0xFFFFFFFFUL)>>(n)) #define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22)) #define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25)) #define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3)) #define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10)) static void sha_transform(SHAobject *sha_info) { int i; SHA_INT32 S[8], W[64], t0, t1; memcpy(W, sha_info->data, sizeof(sha_info->data)); longReverse(W, (int)sizeof(sha_info->data), sha_info->Endianness); for (i = 16; i < 64; ++i) { W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; } for (i = 0; i < 8; ++i) { S[i] = sha_info->digest[i]; } /* Compress */ #define RND(a,b,c,d,e,f,g,h,i,ki) \ t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[i]; \ t1 = Sigma0(a) + Maj(a, b, c); \ d += t0; \ h = t0 + t1; RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],0,0x428a2f98); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],1,0x71374491); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],2,0xb5c0fbcf); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],3,0xe9b5dba5); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],4,0x3956c25b); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],5,0x59f111f1); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],6,0x923f82a4); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],7,0xab1c5ed5); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],8,0xd807aa98); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],9,0x12835b01); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],10,0x243185be); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],11,0x550c7dc3); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],12,0x72be5d74); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],13,0x80deb1fe); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],14,0x9bdc06a7); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],15,0xc19bf174); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],16,0xe49b69c1); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],17,0xefbe4786); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],18,0x0fc19dc6); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],19,0x240ca1cc); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],20,0x2de92c6f); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],21,0x4a7484aa); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],22,0x5cb0a9dc); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],23,0x76f988da); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],24,0x983e5152); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],25,0xa831c66d); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],26,0xb00327c8); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],27,0xbf597fc7); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],28,0xc6e00bf3); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],29,0xd5a79147); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],30,0x06ca6351); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],31,0x14292967); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],32,0x27b70a85); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],33,0x2e1b2138); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],34,0x4d2c6dfc); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],35,0x53380d13); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],36,0x650a7354); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],37,0x766a0abb); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],38,0x81c2c92e); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],39,0x92722c85); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],40,0xa2bfe8a1); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],41,0xa81a664b); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],42,0xc24b8b70); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],43,0xc76c51a3); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],44,0xd192e819); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],45,0xd6990624); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],46,0xf40e3585); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],47,0x106aa070); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],48,0x19a4c116); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],49,0x1e376c08); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],50,0x2748774c); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],51,0x34b0bcb5); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],52,0x391c0cb3); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],53,0x4ed8aa4a); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],54,0x5b9cca4f); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],55,0x682e6ff3); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],56,0x748f82ee); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],57,0x78a5636f); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],58,0x84c87814); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],59,0x8cc70208); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],60,0x90befffa); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],61,0xa4506ceb); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],62,0xbef9a3f7); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],63,0xc67178f2); #undef RND /* feedback */ for (i = 0; i < 8; i++) { sha_info->digest[i] = sha_info->digest[i] + S[i]; } } /* initialize the SHA digest */ static void sha_init(SHAobject *sha_info) { TestEndianness(sha_info->Endianness) sha_info->digest[0] = 0x6A09E667L; sha_info->digest[1] = 0xBB67AE85L; sha_info->digest[2] = 0x3C6EF372L; sha_info->digest[3] = 0xA54FF53AL; sha_info->digest[4] = 0x510E527FL; sha_info->digest[5] = 0x9B05688CL; sha_info->digest[6] = 0x1F83D9ABL; sha_info->digest[7] = 0x5BE0CD19L; sha_info->count_lo = 0L; sha_info->count_hi = 0L; sha_info->local = 0; sha_info->digestsize = 32; } static void sha224_init(SHAobject *sha_info) { TestEndianness(sha_info->Endianness) sha_info->digest[0] = 0xc1059ed8L; sha_info->digest[1] = 0x367cd507L; sha_info->digest[2] = 0x3070dd17L; sha_info->digest[3] = 0xf70e5939L; sha_info->digest[4] = 0xffc00b31L; sha_info->digest[5] = 0x68581511L; sha_info->digest[6] = 0x64f98fa7L; sha_info->digest[7] = 0xbefa4fa4L; sha_info->count_lo = 0L; sha_info->count_hi = 0L; sha_info->local = 0; sha_info->digestsize = 28; } /* update the SHA digest */ static void sha_update(SHAobject *sha_info, SHA_BYTE *buffer, int count) { int i; SHA_INT32 clo; clo = sha_info->count_lo + ((SHA_INT32) count << 3); if (clo < sha_info->count_lo) { ++sha_info->count_hi; } sha_info->count_lo = clo; sha_info->count_hi += (SHA_INT32) count >> 29; if (sha_info->local) { i = SHA_BLOCKSIZE - sha_info->local; if (i > count) { i = count; } memcpy(((SHA_BYTE *) sha_info->data) + sha_info->local, buffer, i); count -= i; buffer += i; sha_info->local += i; if (sha_info->local == SHA_BLOCKSIZE) { sha_transform(sha_info); } else { return; } } while (count >= SHA_BLOCKSIZE) { memcpy(sha_info->data, buffer, SHA_BLOCKSIZE); buffer += SHA_BLOCKSIZE; count -= SHA_BLOCKSIZE; sha_transform(sha_info); } memcpy(sha_info->data, buffer, count); sha_info->local = count; } /* finish computing the SHA digest */ static void sha_final(unsigned char digest[SHA_DIGESTSIZE], SHAobject *sha_info) { int count; SHA_INT32 lo_bit_count, hi_bit_count; lo_bit_count = sha_info->count_lo; hi_bit_count = sha_info->count_hi; count = (int) ((lo_bit_count >> 3) & 0x3f); ((SHA_BYTE *) sha_info->data)[count++] = 0x80; if (count > SHA_BLOCKSIZE - 8) { memset(((SHA_BYTE *) sha_info->data) + count, 0, SHA_BLOCKSIZE - count); sha_transform(sha_info); memset((SHA_BYTE *) sha_info->data, 0, SHA_BLOCKSIZE - 8); } else { memset(((SHA_BYTE *) sha_info->data) + count, 0, SHA_BLOCKSIZE - 8 - count); } /* GJS: note that we add the hi/lo in big-endian. sha_transform will swap these values into host-order. */ sha_info->data[56] = (hi_bit_count >> 24) & 0xff; sha_info->data[57] = (hi_bit_count >> 16) & 0xff; sha_info->data[58] = (hi_bit_count >> 8) & 0xff; sha_info->data[59] = (hi_bit_count >> 0) & 0xff; sha_info->data[60] = (lo_bit_count >> 24) & 0xff; sha_info->data[61] = (lo_bit_count >> 16) & 0xff; sha_info->data[62] = (lo_bit_count >> 8) & 0xff; sha_info->data[63] = (lo_bit_count >> 0) & 0xff; sha_transform(sha_info); digest[ 0] = (unsigned char) ((sha_info->digest[0] >> 24) & 0xff); digest[ 1] = (unsigned char) ((sha_info->digest[0] >> 16) & 0xff); digest[ 2] = (unsigned char) ((sha_info->digest[0] >> 8) & 0xff); digest[ 3] = (unsigned char) ((sha_info->digest[0] ) & 0xff); digest[ 4] = (unsigned char) ((sha_info->digest[1] >> 24) & 0xff); digest[ 5] = (unsigned char) ((sha_info->digest[1] >> 16) & 0xff); digest[ 6] = (unsigned char) ((sha_info->digest[1] >> 8) & 0xff); digest[ 7] = (unsigned char) ((sha_info->digest[1] ) & 0xff); digest[ 8] = (unsigned char) ((sha_info->digest[2] >> 24) & 0xff); digest[ 9] = (unsigned char) ((sha_info->digest[2] >> 16) & 0xff); digest[10] = (unsigned char) ((sha_info->digest[2] >> 8) & 0xff); digest[11] = (unsigned char) ((sha_info->digest[2] ) & 0xff); digest[12] = (unsigned char) ((sha_info->digest[3] >> 24) & 0xff); digest[13] = (unsigned char) ((sha_info->digest[3] >> 16) & 0xff); digest[14] = (unsigned char) ((sha_info->digest[3] >> 8) & 0xff); digest[15] = (unsigned char) ((sha_info->digest[3] ) & 0xff); digest[16] = (unsigned char) ((sha_info->digest[4] >> 24) & 0xff); digest[17] = (unsigned char) ((sha_info->digest[4] >> 16) & 0xff); digest[18] = (unsigned char) ((sha_info->digest[4] >> 8) & 0xff); digest[19] = (unsigned char) ((sha_info->digest[4] ) & 0xff); digest[20] = (unsigned char) ((sha_info->digest[5] >> 24) & 0xff); digest[21] = (unsigned char) ((sha_info->digest[5] >> 16) & 0xff); digest[22] = (unsigned char) ((sha_info->digest[5] >> 8) & 0xff); digest[23] = (unsigned char) ((sha_info->digest[5] ) & 0xff); digest[24] = (unsigned char) ((sha_info->digest[6] >> 24) & 0xff); digest[25] = (unsigned char) ((sha_info->digest[6] >> 16) & 0xff); digest[26] = (unsigned char) ((sha_info->digest[6] >> 8) & 0xff); digest[27] = (unsigned char) ((sha_info->digest[6] ) & 0xff); digest[28] = (unsigned char) ((sha_info->digest[7] >> 24) & 0xff); digest[29] = (unsigned char) ((sha_info->digest[7] >> 16) & 0xff); digest[30] = (unsigned char) ((sha_info->digest[7] >> 8) & 0xff); digest[31] = (unsigned char) ((sha_info->digest[7] ) & 0xff); } /* * End of copied SHA code. * * ------------------------------------------------------------------------ */ static PyTypeObject SHA224type; static PyTypeObject SHA256type; static SHAobject * newSHA224object(void) { return (SHAobject *)PyObject_New(SHAobject, &SHA224type); } static SHAobject * newSHA256object(void) { return (SHAobject *)PyObject_New(SHAobject, &SHA256type); } /* Internal methods for a hash object */ static void SHA_dealloc(PyObject *ptr) { PyObject_Del(ptr); } /* External methods for a hash object */ PyDoc_STRVAR(SHA256_copy__doc__, "Return a copy of the hash object."); static PyObject * SHA256_copy(SHAobject *self, PyObject *args) { SHAobject *newobj; if (!PyArg_ParseTuple(args, ":copy")) { return NULL; } if (((PyObject*)self)->ob_type == &SHA256type) { if ( (newobj = newSHA256object())==NULL) return NULL; } else { if ( (newobj = newSHA224object())==NULL) return NULL; } SHAcopy(self, newobj); return (PyObject *)newobj; } PyDoc_STRVAR(SHA256_digest__doc__, "Return the digest value as a string of binary data."); static PyObject * SHA256_digest(SHAobject *self, PyObject *args) { unsigned char digest[SHA_DIGESTSIZE]; SHAobject temp; if (!PyArg_ParseTuple(args, ":digest")) return NULL; SHAcopy(self, &temp); sha_final(digest, &temp); return PyString_FromStringAndSize((const char *)digest, self->digestsize); } PyDoc_STRVAR(SHA256_hexdigest__doc__, "Return the digest value as a string of hexadecimal digits."); static PyObject * SHA256_hexdigest(SHAobject *self, PyObject *args) { unsigned char digest[SHA_DIGESTSIZE]; SHAobject temp; PyObject *retval; char *hex_digest; int i, j; if (!PyArg_ParseTuple(args, ":hexdigest")) return NULL; /* Get the raw (binary) digest value */ SHAcopy(self, &temp); sha_final(digest, &temp); /* Create a new string */ retval = PyString_FromStringAndSize(NULL, self->digestsize * 2); if (!retval) return NULL; hex_digest = PyString_AsString(retval); if (!hex_digest) { Py_DECREF(retval); return NULL; } /* Make hex version of the digest */ for(i=j=0; idigestsize; i++) { char c; c = (digest[i] >> 4) & 0xf; c = (c>9) ? c+'a'-10 : c + '0'; hex_digest[j++] = c; c = (digest[i] & 0xf); c = (c>9) ? c+'a'-10 : c + '0'; hex_digest[j++] = c; } return retval; } PyDoc_STRVAR(SHA256_update__doc__, "Update this hash object's state with the provided string."); static PyObject * SHA256_update(SHAobject *self, PyObject *args) { unsigned char *cp; int len; if (!PyArg_ParseTuple(args, "s#:update", &cp, &len)) return NULL; sha_update(self, cp, len); Py_INCREF(Py_None); return Py_None; } static PyMethodDef SHA_methods[] = { {"copy", (PyCFunction)SHA256_copy, METH_VARARGS, SHA256_copy__doc__}, {"digest", (PyCFunction)SHA256_digest, METH_VARARGS, SHA256_digest__doc__}, {"hexdigest", (PyCFunction)SHA256_hexdigest, METH_VARARGS, SHA256_hexdigest__doc__}, {"update", (PyCFunction)SHA256_update, METH_VARARGS, SHA256_update__doc__}, {NULL, NULL} /* sentinel */ }; static PyObject * SHA256_get_block_size(PyObject *self, void *closure) { return PyInt_FromLong(SHA_BLOCKSIZE); } static PyObject * SHA256_get_name(PyObject *self, void *closure) { if (((SHAobject *)self)->digestsize == 32) return PyString_FromStringAndSize("SHA256", 6); else return PyString_FromStringAndSize("SHA224", 6); } static PyGetSetDef SHA_getseters[] = { {"block_size", (getter)SHA256_get_block_size, NULL, NULL, NULL}, {"name", (getter)SHA256_get_name, NULL, NULL, NULL}, {NULL} /* Sentinel */ }; static PyMemberDef SHA_members[] = { {"digest_size", T_INT, offsetof(SHAobject, digestsize), READONLY, NULL}, /* the old md5 and sha modules support 'digest_size' as in PEP 247. * the old sha module also supported 'digestsize'. ugh. */ {"digestsize", T_INT, offsetof(SHAobject, digestsize), READONLY, NULL}, {NULL} /* Sentinel */ }; static PyTypeObject SHA224type = { PyObject_HEAD_INIT(NULL) 0, /*ob_size*/ "_sha256.sha224", /*tp_name*/ sizeof(SHAobject), /*tp_size*/ 0, /*tp_itemsize*/ /* methods */ SHA_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT, /*tp_flags*/ 0, /*tp_doc*/ 0, /*tp_traverse*/ 0, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ SHA_methods, /* tp_methods */ SHA_members, /* tp_members */ SHA_getseters, /* tp_getset */ }; static PyTypeObject SHA256type = { PyObject_HEAD_INIT(NULL) 0, /*ob_size*/ "_sha256.sha256", /*tp_name*/ sizeof(SHAobject), /*tp_size*/ 0, /*tp_itemsize*/ /* methods */ SHA_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT, /*tp_flags*/ 0, /*tp_doc*/ 0, /*tp_traverse*/ 0, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ SHA_methods, /* tp_methods */ SHA_members, /* tp_members */ SHA_getseters, /* tp_getset */ }; /* The single module-level function: new() */ PyDoc_STRVAR(SHA256_new__doc__, "Return a new SHA-256 hash object; optionally initialized with a string."); static PyObject * SHA256_new(PyObject *self, PyObject *args, PyObject *kwdict) { static char *kwlist[] = {"string", NULL}; SHAobject *new; unsigned char *cp = NULL; int len; if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|s#:new", kwlist, &cp, &len)) { return NULL; } if ((new = newSHA256object()) == NULL) return NULL; sha_init(new); if (PyErr_Occurred()) { Py_DECREF(new); return NULL; } if (cp) sha_update(new, cp, len); return (PyObject *)new; } PyDoc_STRVAR(SHA224_new__doc__, "Return a new SHA-224 hash object; optionally initialized with a string."); static PyObject * SHA224_new(PyObject *self, PyObject *args, PyObject *kwdict) { static char *kwlist[] = {"string", NULL}; SHAobject *new; unsigned char *cp = NULL; int len; if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|s#:new", kwlist, &cp, &len)) { return NULL; } if ((new = newSHA224object()) == NULL) return NULL; sha224_init(new); if (PyErr_Occurred()) { Py_DECREF(new); return NULL; } if (cp) sha_update(new, cp, len); return (PyObject *)new; } /* List of functions exported by this module */ static struct PyMethodDef SHA_functions[] = { {"sha256", (PyCFunction)SHA256_new, METH_VARARGS|METH_KEYWORDS, SHA256_new__doc__}, {"sha224", (PyCFunction)SHA224_new, METH_VARARGS|METH_KEYWORDS, SHA224_new__doc__}, {NULL, NULL} /* Sentinel */ }; /* Initialize this module. */ #define insint(n,v) { PyModule_AddIntConstant(m,n,v); } PyMODINIT_FUNC init_sha256(void) { PyObject *m; SHA224type.ob_type = &PyType_Type; if (PyType_Ready(&SHA224type) < 0) return; SHA256type.ob_type = &PyType_Type; if (PyType_Ready(&SHA256type) < 0) return; m = Py_InitModule("_sha256", SHA_functions); } --- NEW FILE: sha512module.c --- /* SHA512 module */ /* This module provides an interface to NIST's SHA-512 and SHA-384 Algorithms */ /* See below for information about the original code this module was based upon. Additional work performed by: Andrew Kuchling (amk at amk.ca) Greg Stein (gstein at lyra.org) Trevor Perrin (trevp at trevp.net) Copyright (C) 2005 Gregory P. Smith (greg at electricrain.com) Licensed to PSF under a Contributor Agreement. */ /* SHA objects */ #include "Python.h" #include "structmember.h" #ifdef PY_LONG_LONG /* If no PY_LONG_LONG, don't compile anything! */ /* Endianness testing and definitions */ #define TestEndianness(variable) {int i=1; variable=PCT_BIG_ENDIAN;\ if (*((char*)&i)==1) variable=PCT_LITTLE_ENDIAN;} #define PCT_LITTLE_ENDIAN 1 #define PCT_BIG_ENDIAN 0 /* Some useful types */ typedef unsigned char SHA_BYTE; #if SIZEOF_INT == 4 typedef unsigned int SHA_INT32; /* 32-bit integer */ typedef unsigned PY_LONG_LONG SHA_INT64; /* 64-bit integer */ #else /* not defined. compilation will die. */ #endif /* The SHA block size and message digest sizes, in bytes */ #define SHA_BLOCKSIZE 128 #define SHA_DIGESTSIZE 64 /* The structure for storing SHA info */ typedef struct { PyObject_HEAD SHA_INT64 digest[8]; /* Message digest */ SHA_INT32 count_lo, count_hi; /* 64-bit bit count */ SHA_BYTE data[SHA_BLOCKSIZE]; /* SHA data buffer */ int Endianness; int local; /* unprocessed amount in data */ int digestsize; } SHAobject; /* When run on a little-endian CPU we need to perform byte reversal on an array of longwords. */ static void longReverse(SHA_INT64 *buffer, int byteCount, int Endianness) { SHA_INT64 value; if ( Endianness == PCT_BIG_ENDIAN ) return; byteCount /= sizeof(*buffer); while (byteCount--) { value = *buffer; ((unsigned char*)buffer)[0] = (unsigned char)(value >> 56) & 0xff; ((unsigned char*)buffer)[1] = (unsigned char)(value >> 48) & 0xff; ((unsigned char*)buffer)[2] = (unsigned char)(value >> 40) & 0xff; ((unsigned char*)buffer)[3] = (unsigned char)(value >> 32) & 0xff; ((unsigned char*)buffer)[4] = (unsigned char)(value >> 24) & 0xff; ((unsigned char*)buffer)[5] = (unsigned char)(value >> 16) & 0xff; ((unsigned char*)buffer)[6] = (unsigned char)(value >> 8) & 0xff; ((unsigned char*)buffer)[7] = (unsigned char)(value ) & 0xff; buffer++; } } static void SHAcopy(SHAobject *src, SHAobject *dest) { dest->Endianness = src->Endianness; dest->local = src->local; dest->digestsize = src->digestsize; dest->count_lo = src->count_lo; dest->count_hi = src->count_hi; memcpy(dest->digest, src->digest, sizeof(src->digest)); memcpy(dest->data, src->data, sizeof(src->data)); } /* ------------------------------------------------------------------------ * * This code for the SHA-512 algorithm was noted as public domain. The * original headers are pasted below. * * Several changes have been made to make it more compatible with the * Python environment and desired interface. * */ /* LibTomCrypt, modular cryptographic library -- Tom St Denis * * LibTomCrypt is a library that provides various cryptographic * algorithms in a highly modular and flexible manner. * * The library is free for all purposes without any express * gurantee it works. * * Tom St Denis, tomstdenis at iahu.ca, http://libtomcrypt.org */ /* SHA512 by Tom St Denis */ /* Various logical functions */ #define ROR64(x, y) \ ( ((((x) & 0xFFFFFFFFFFFFFFFF)>>((unsigned PY_LONG_LONG)(y) & 63)) | \ ((x)<<((unsigned PY_LONG_LONG)(64-((y) & 63))))) & 0xFFFFFFFFFFFFFFFF) #define Ch(x,y,z) (z ^ (x & (y ^ z))) #define Maj(x,y,z) (((x | y) & z) | (x & y)) #define S(x, n) ROR64((x),(n)) #define R(x, n) (((x) & 0xFFFFFFFFFFFFFFFF) >> ((unsigned PY_LONG_LONG)n)) #define Sigma0(x) (S(x, 28) ^ S(x, 34) ^ S(x, 39)) #define Sigma1(x) (S(x, 14) ^ S(x, 18) ^ S(x, 41)) #define Gamma0(x) (S(x, 1) ^ S(x, 8) ^ R(x, 7)) #define Gamma1(x) (S(x, 19) ^ S(x, 61) ^ R(x, 6)) static void sha512_transform(SHAobject *sha_info) { int i; SHA_INT64 S[8], W[80], t0, t1; memcpy(W, sha_info->data, sizeof(sha_info->data)); longReverse(W, (int)sizeof(sha_info->data), sha_info->Endianness); for (i = 16; i < 80; ++i) { W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; } for (i = 0; i < 8; ++i) { S[i] = sha_info->digest[i]; } /* Compress */ #define RND(a,b,c,d,e,f,g,h,i,ki) \ t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[i]; \ t1 = Sigma0(a) + Maj(a, b, c); \ d += t0; \ h = t0 + t1; RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],0,0x428a2f98d728ae22); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],1,0x7137449123ef65cd); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],2,0xb5c0fbcfec4d3b2f); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],3,0xe9b5dba58189dbbc); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],4,0x3956c25bf348b538); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],5,0x59f111f1b605d019); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],6,0x923f82a4af194f9b); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],7,0xab1c5ed5da6d8118); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],8,0xd807aa98a3030242); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],9,0x12835b0145706fbe); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],10,0x243185be4ee4b28c); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],11,0x550c7dc3d5ffb4e2); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],12,0x72be5d74f27b896f); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],13,0x80deb1fe3b1696b1); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],14,0x9bdc06a725c71235); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],15,0xc19bf174cf692694); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],16,0xe49b69c19ef14ad2); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],17,0xefbe4786384f25e3); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],18,0x0fc19dc68b8cd5b5); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],19,0x240ca1cc77ac9c65); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],20,0x2de92c6f592b0275); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],21,0x4a7484aa6ea6e483); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],22,0x5cb0a9dcbd41fbd4); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],23,0x76f988da831153b5); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],24,0x983e5152ee66dfab); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],25,0xa831c66d2db43210); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],26,0xb00327c898fb213f); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],27,0xbf597fc7beef0ee4); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],28,0xc6e00bf33da88fc2); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],29,0xd5a79147930aa725); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],30,0x06ca6351e003826f); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],31,0x142929670a0e6e70); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],32,0x27b70a8546d22ffc); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],33,0x2e1b21385c26c926); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],34,0x4d2c6dfc5ac42aed); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],35,0x53380d139d95b3df); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],36,0x650a73548baf63de); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],37,0x766a0abb3c77b2a8); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],38,0x81c2c92e47edaee6); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],39,0x92722c851482353b); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],40,0xa2bfe8a14cf10364); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],41,0xa81a664bbc423001); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],42,0xc24b8b70d0f89791); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],43,0xc76c51a30654be30); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],44,0xd192e819d6ef5218); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],45,0xd69906245565a910); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],46,0xf40e35855771202a); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],47,0x106aa07032bbd1b8); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],48,0x19a4c116b8d2d0c8); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],49,0x1e376c085141ab53); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],50,0x2748774cdf8eeb99); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],51,0x34b0bcb5e19b48a8); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],52,0x391c0cb3c5c95a63); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],53,0x4ed8aa4ae3418acb); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],54,0x5b9cca4f7763e373); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],55,0x682e6ff3d6b2b8a3); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],56,0x748f82ee5defb2fc); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],57,0x78a5636f43172f60); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],58,0x84c87814a1f0ab72); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],59,0x8cc702081a6439ec); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],60,0x90befffa23631e28); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],61,0xa4506cebde82bde9); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],62,0xbef9a3f7b2c67915); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],63,0xc67178f2e372532b); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],64,0xca273eceea26619c); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],65,0xd186b8c721c0c207); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],66,0xeada7dd6cde0eb1e); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],67,0xf57d4f7fee6ed178); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],68,0x06f067aa72176fba); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],69,0x0a637dc5a2c898a6); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],70,0x113f9804bef90dae); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],71,0x1b710b35131c471b); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],72,0x28db77f523047d84); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],73,0x32caab7b40c72493); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],74,0x3c9ebe0a15c9bebc); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],75,0x431d67c49c100d4c); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],76,0x4cc5d4becb3e42b6); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],77,0x597f299cfc657e2a); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],78,0x5fcb6fab3ad6faec); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],79,0x6c44198c4a475817); #undef RND /* feedback */ for (i = 0; i < 8; i++) { sha_info->digest[i] = sha_info->digest[i] + S[i]; } } /* initialize the SHA digest */ static void sha512_init(SHAobject *sha_info) { TestEndianness(sha_info->Endianness) sha_info->digest[0] = 0x6a09e667f3bcc908; sha_info->digest[1] = 0xbb67ae8584caa73b; sha_info->digest[2] = 0x3c6ef372fe94f82b; sha_info->digest[3] = 0xa54ff53a5f1d36f1; sha_info->digest[4] = 0x510e527fade682d1; sha_info->digest[5] = 0x9b05688c2b3e6c1f; sha_info->digest[6] = 0x1f83d9abfb41bd6b; sha_info->digest[7] = 0x5be0cd19137e2179; sha_info->count_lo = 0L; sha_info->count_hi = 0L; sha_info->local = 0; sha_info->digestsize = 64; } static void sha384_init(SHAobject *sha_info) { TestEndianness(sha_info->Endianness) sha_info->digest[0] = 0xcbbb9d5dc1059ed8; sha_info->digest[1] = 0x629a292a367cd507; sha_info->digest[2] = 0x9159015a3070dd17; sha_info->digest[3] = 0x152fecd8f70e5939; sha_info->digest[4] = 0x67332667ffc00b31; sha_info->digest[5] = 0x8eb44a8768581511; sha_info->digest[6] = 0xdb0c2e0d64f98fa7; sha_info->digest[7] = 0x47b5481dbefa4fa4; sha_info->count_lo = 0L; sha_info->count_hi = 0L; sha_info->local = 0; sha_info->digestsize = 48; } /* update the SHA digest */ static void sha512_update(SHAobject *sha_info, SHA_BYTE *buffer, int count) { int i; SHA_INT32 clo; clo = sha_info->count_lo + ((SHA_INT32) count << 3); if (clo < sha_info->count_lo) { ++sha_info->count_hi; } sha_info->count_lo = clo; sha_info->count_hi += (SHA_INT32) count >> 29; if (sha_info->local) { i = SHA_BLOCKSIZE - sha_info->local; if (i > count) { i = count; } memcpy(((SHA_BYTE *) sha_info->data) + sha_info->local, buffer, i); count -= i; buffer += i; sha_info->local += i; if (sha_info->local == SHA_BLOCKSIZE) { sha512_transform(sha_info); } else { return; } } while (count >= SHA_BLOCKSIZE) { memcpy(sha_info->data, buffer, SHA_BLOCKSIZE); buffer += SHA_BLOCKSIZE; count -= SHA_BLOCKSIZE; sha512_transform(sha_info); } memcpy(sha_info->data, buffer, count); sha_info->local = count; } /* finish computing the SHA digest */ static void sha512_final(unsigned char digest[SHA_DIGESTSIZE], SHAobject *sha_info) { int count; SHA_INT32 lo_bit_count, hi_bit_count; lo_bit_count = sha_info->count_lo; hi_bit_count = sha_info->count_hi; count = (int) ((lo_bit_count >> 3) & 0x7f); ((SHA_BYTE *) sha_info->data)[count++] = 0x80; if (count > SHA_BLOCKSIZE - 16) { memset(((SHA_BYTE *) sha_info->data) + count, 0, SHA_BLOCKSIZE - count); sha512_transform(sha_info); memset((SHA_BYTE *) sha_info->data, 0, SHA_BLOCKSIZE - 16); } else { memset(((SHA_BYTE *) sha_info->data) + count, 0, SHA_BLOCKSIZE - 16 - count); } /* GJS: note that we add the hi/lo in big-endian. sha512_transform will swap these values into host-order. */ sha_info->data[112] = 0; sha_info->data[113] = 0; sha_info->data[114] = 0; sha_info->data[115] = 0; sha_info->data[116] = 0; sha_info->data[117] = 0; sha_info->data[118] = 0; sha_info->data[119] = 0; sha_info->data[120] = (hi_bit_count >> 24) & 0xff; sha_info->data[121] = (hi_bit_count >> 16) & 0xff; sha_info->data[122] = (hi_bit_count >> 8) & 0xff; sha_info->data[123] = (hi_bit_count >> 0) & 0xff; sha_info->data[124] = (lo_bit_count >> 24) & 0xff; sha_info->data[125] = (lo_bit_count >> 16) & 0xff; sha_info->data[126] = (lo_bit_count >> 8) & 0xff; sha_info->data[127] = (lo_bit_count >> 0) & 0xff; sha512_transform(sha_info); digest[ 0] = (unsigned char) ((sha_info->digest[0] >> 56) & 0xff); digest[ 1] = (unsigned char) ((sha_info->digest[0] >> 48) & 0xff); digest[ 2] = (unsigned char) ((sha_info->digest[0] >> 40) & 0xff); digest[ 3] = (unsigned char) ((sha_info->digest[0] >> 32) & 0xff); digest[ 4] = (unsigned char) ((sha_info->digest[0] >> 24) & 0xff); digest[ 5] = (unsigned char) ((sha_info->digest[0] >> 16) & 0xff); digest[ 6] = (unsigned char) ((sha_info->digest[0] >> 8) & 0xff); digest[ 7] = (unsigned char) ((sha_info->digest[0] ) & 0xff); digest[ 8] = (unsigned char) ((sha_info->digest[1] >> 56) & 0xff); digest[ 9] = (unsigned char) ((sha_info->digest[1] >> 48) & 0xff); digest[10] = (unsigned char) ((sha_info->digest[1] >> 40) & 0xff); digest[11] = (unsigned char) ((sha_info->digest[1] >> 32) & 0xff); digest[12] = (unsigned char) ((sha_info->digest[1] >> 24) & 0xff); digest[13] = (unsigned char) ((sha_info->digest[1] >> 16) & 0xff); digest[14] = (unsigned char) ((sha_info->digest[1] >> 8) & 0xff); digest[15] = (unsigned char) ((sha_info->digest[1] ) & 0xff); digest[16] = (unsigned char) ((sha_info->digest[2] >> 56) & 0xff); digest[17] = (unsigned char) ((sha_info->digest[2] >> 48) & 0xff); digest[18] = (unsigned char) ((sha_info->digest[2] >> 40) & 0xff); digest[19] = (unsigned char) ((sha_info->digest[2] >> 32) & 0xff); digest[20] = (unsigned char) ((sha_info->digest[2] >> 24) & 0xff); digest[21] = (unsigned char) ((sha_info->digest[2] >> 16) & 0xff); digest[22] = (unsigned char) ((sha_info->digest[2] >> 8) & 0xff); digest[23] = (unsigned char) ((sha_info->digest[2] ) & 0xff); digest[24] = (unsigned char) ((sha_info->digest[3] >> 56) & 0xff); digest[25] = (unsigned char) ((sha_info->digest[3] >> 48) & 0xff); digest[26] = (unsigned char) ((sha_info->digest[3] >> 40) & 0xff); digest[27] = (unsigned char) ((sha_info->digest[3] >> 32) & 0xff); digest[28] = (unsigned char) ((sha_info->digest[3] >> 24) & 0xff); digest[29] = (unsigned char) ((sha_info->digest[3] >> 16) & 0xff); digest[30] = (unsigned char) ((sha_info->digest[3] >> 8) & 0xff); digest[31] = (unsigned char) ((sha_info->digest[3] ) & 0xff); digest[32] = (unsigned char) ((sha_info->digest[4] >> 56) & 0xff); digest[33] = (unsigned char) ((sha_info->digest[4] >> 48) & 0xff); digest[34] = (unsigned char) ((sha_info->digest[4] >> 40) & 0xff); digest[35] = (unsigned char) ((sha_info->digest[4] >> 32) & 0xff); digest[36] = (unsigned char) ((sha_info->digest[4] >> 24) & 0xff); digest[37] = (unsigned char) ((sha_info->digest[4] >> 16) & 0xff); digest[38] = (unsigned char) ((sha_info->digest[4] >> 8) & 0xff); digest[39] = (unsigned char) ((sha_info->digest[4] ) & 0xff); digest[40] = (unsigned char) ((sha_info->digest[5] >> 56) & 0xff); digest[41] = (unsigned char) ((sha_info->digest[5] >> 48) & 0xff); digest[42] = (unsigned char) ((sha_info->digest[5] >> 40) & 0xff); digest[43] = (unsigned char) ((sha_info->digest[5] >> 32) & 0xff); digest[44] = (unsigned char) ((sha_info->digest[5] >> 24) & 0xff); digest[45] = (unsigned char) ((sha_info->digest[5] >> 16) & 0xff); digest[46] = (unsigned char) ((sha_info->digest[5] >> 8) & 0xff); digest[47] = (unsigned char) ((sha_info->digest[5] ) & 0xff); digest[48] = (unsigned char) ((sha_info->digest[6] >> 56) & 0xff); digest[49] = (unsigned char) ((sha_info->digest[6] >> 48) & 0xff); digest[50] = (unsigned char) ((sha_info->digest[6] >> 40) & 0xff); digest[51] = (unsigned char) ((sha_info->digest[6] >> 32) & 0xff); digest[52] = (unsigned char) ((sha_info->digest[6] >> 24) & 0xff); digest[53] = (unsigned char) ((sha_info->digest[6] >> 16) & 0xff); digest[54] = (unsigned char) ((sha_info->digest[6] >> 8) & 0xff); digest[55] = (unsigned char) ((sha_info->digest[6] ) & 0xff); digest[56] = (unsigned char) ((sha_info->digest[7] >> 56) & 0xff); digest[57] = (unsigned char) ((sha_info->digest[7] >> 48) & 0xff); digest[58] = (unsigned char) ((sha_info->digest[7] >> 40) & 0xff); digest[59] = (unsigned char) ((sha_info->digest[7] >> 32) & 0xff); digest[60] = (unsigned char) ((sha_info->digest[7] >> 24) & 0xff); digest[61] = (unsigned char) ((sha_info->digest[7] >> 16) & 0xff); digest[62] = (unsigned char) ((sha_info->digest[7] >> 8) & 0xff); digest[63] = (unsigned char) ((sha_info->digest[7] ) & 0xff); } /* * End of copied SHA code. * * ------------------------------------------------------------------------ */ static PyTypeObject SHA384type; static PyTypeObject SHA512type; static SHAobject * newSHA384object(void) { return (SHAobject *)PyObject_New(SHAobject, &SHA384type); } static SHAobject * newSHA512object(void) { return (SHAobject *)PyObject_New(SHAobject, &SHA512type); } /* Internal methods for a hash object */ static void SHA512_dealloc(PyObject *ptr) { PyObject_Del(ptr); } /* External methods for a hash object */ PyDoc_STRVAR(SHA512_copy__doc__, "Return a copy of the hash object."); static PyObject * SHA512_copy(SHAobject *self, PyObject *args) { SHAobject *newobj; if (!PyArg_ParseTuple(args, ":copy")) { return NULL; } if (((PyObject*)self)->ob_type == &SHA512type) { if ( (newobj = newSHA512object())==NULL) return NULL; } else { if ( (newobj = newSHA384object())==NULL) return NULL; } SHAcopy(self, newobj); return (PyObject *)newobj; } PyDoc_STRVAR(SHA512_digest__doc__, "Return the digest value as a string of binary data."); static PyObject * SHA512_digest(SHAobject *self, PyObject *args) { unsigned char digest[SHA_DIGESTSIZE]; SHAobject temp; if (!PyArg_ParseTuple(args, ":digest")) return NULL; SHAcopy(self, &temp); sha512_final(digest, &temp); return PyString_FromStringAndSize((const char *)digest, self->digestsize); } PyDoc_STRVAR(SHA512_hexdigest__doc__, "Return the digest value as a string of hexadecimal digits."); static PyObject * SHA512_hexdigest(SHAobject *self, PyObject *args) { unsigned char digest[SHA_DIGESTSIZE]; SHAobject temp; PyObject *retval; char *hex_digest; int i, j; if (!PyArg_ParseTuple(args, ":hexdigest")) return NULL; /* Get the raw (binary) digest value */ SHAcopy(self, &temp); sha512_final(digest, &temp); /* Create a new string */ retval = PyString_FromStringAndSize(NULL, self->digestsize * 2); if (!retval) return NULL; hex_digest = PyString_AsString(retval); if (!hex_digest) { Py_DECREF(retval); return NULL; } /* Make hex version of the digest */ for(i=j=0; idigestsize; i++) { char c; c = (digest[i] >> 4) & 0xf; c = (c>9) ? c+'a'-10 : c + '0'; hex_digest[j++] = c; c = (digest[i] & 0xf); c = (c>9) ? c+'a'-10 : c + '0'; hex_digest[j++] = c; } return retval; } PyDoc_STRVAR(SHA512_update__doc__, "Update this hash object's state with the provided string."); static PyObject * SHA512_update(SHAobject *self, PyObject *args) { unsigned char *cp; int len; if (!PyArg_ParseTuple(args, "s#:update", &cp, &len)) return NULL; sha512_update(self, cp, len); Py_INCREF(Py_None); return Py_None; } static PyMethodDef SHA_methods[] = { {"copy", (PyCFunction)SHA512_copy, METH_VARARGS, SHA512_copy__doc__}, {"digest", (PyCFunction)SHA512_digest, METH_VARARGS, SHA512_digest__doc__}, {"hexdigest", (PyCFunction)SHA512_hexdigest, METH_VARARGS, SHA512_hexdigest__doc__}, {"update", (PyCFunction)SHA512_update, METH_VARARGS, SHA512_update__doc__}, {NULL, NULL} /* sentinel */ }; static PyObject * SHA512_get_block_size(PyObject *self, void *closure) { return PyInt_FromLong(SHA_BLOCKSIZE); } static PyObject * SHA512_get_name(PyObject *self, void *closure) { if (((SHAobject *)self)->digestsize == 64) return PyString_FromStringAndSize("SHA512", 6); else return PyString_FromStringAndSize("SHA384", 6); } static PyGetSetDef SHA_getseters[] = { {"block_size", (getter)SHA512_get_block_size, NULL, NULL, NULL}, {"name", (getter)SHA512_get_name, NULL, NULL, NULL}, {NULL} /* Sentinel */ }; static PyMemberDef SHA_members[] = { {"digest_size", T_INT, offsetof(SHAobject, digestsize), READONLY, NULL}, /* the old md5 and sha modules support 'digest_size' as in PEP 247. * the old sha module also supported 'digestsize'. ugh. */ {"digestsize", T_INT, offsetof(SHAobject, digestsize), READONLY, NULL}, {NULL} /* Sentinel */ }; static PyTypeObject SHA384type = { PyObject_HEAD_INIT(NULL) 0, /*ob_size*/ "_sha512.sha384", /*tp_name*/ sizeof(SHAobject), /*tp_size*/ 0, /*tp_itemsize*/ /* methods */ SHA512_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT, /*tp_flags*/ 0, /*tp_doc*/ 0, /*tp_traverse*/ 0, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ SHA_methods, /* tp_methods */ SHA_members, /* tp_members */ SHA_getseters, /* tp_getset */ }; static PyTypeObject SHA512type = { PyObject_HEAD_INIT(NULL) 0, /*ob_size*/ "_sha512.sha512", /*tp_name*/ sizeof(SHAobject), /*tp_size*/ 0, /*tp_itemsize*/ /* methods */ SHA512_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT, /*tp_flags*/ 0, /*tp_doc*/ 0, /*tp_traverse*/ 0, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ SHA_methods, /* tp_methods */ SHA_members, /* tp_members */ SHA_getseters, /* tp_getset */ }; /* The single module-level function: new() */ PyDoc_STRVAR(SHA512_new__doc__, "Return a new SHA-512 hash object; optionally initialized with a string."); static PyObject * SHA512_new(PyObject *self, PyObject *args, PyObject *kwdict) { static char *kwlist[] = {"string", NULL}; SHAobject *new; unsigned char *cp = NULL; int len; if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|s#:new", kwlist, &cp, &len)) { return NULL; } if ((new = newSHA512object()) == NULL) return NULL; sha512_init(new); if (PyErr_Occurred()) { Py_DECREF(new); return NULL; } if (cp) sha512_update(new, cp, len); return (PyObject *)new; } PyDoc_STRVAR(SHA384_new__doc__, "Return a new SHA-384 hash object; optionally initialized with a string."); static PyObject * SHA384_new(PyObject *self, PyObject *args, PyObject *kwdict) { static char *kwlist[] = {"string", NULL}; SHAobject *new; unsigned char *cp = NULL; int len; if (!PyArg_ParseTupleAndKeywords(args, kwdict, "|s#:new", kwlist, &cp, &len)) { return NULL; } if ((new = newSHA384object()) == NULL) return NULL; sha384_init(new); if (PyErr_Occurred()) { Py_DECREF(new); return NULL; } if (cp) sha512_update(new, cp, len); return (PyObject *)new; } /* List of functions exported by this module */ static struct PyMethodDef SHA_functions[] = { {"sha512", (PyCFunction)SHA512_new, METH_VARARGS|METH_KEYWORDS, SHA512_new__doc__}, {"sha384", (PyCFunction)SHA384_new, METH_VARARGS|METH_KEYWORDS, SHA384_new__doc__}, {NULL, NULL} /* Sentinel */ }; /* Initialize this module. */ #define insint(n,v) { PyModule_AddIntConstant(m,n,v); } PyMODINIT_FUNC init_sha512(void) { PyObject *m; SHA384type.ob_type = &PyType_Type; if (PyType_Ready(&SHA384type) < 0) return; SHA512type.ob_type = &PyType_Type; if (PyType_Ready(&SHA512type) < 0) return; m = Py_InitModule("_sha512", SHA_functions); } #endif --- NEW FILE: spwdmodule.c --- /* UNIX shadow password file access module */ /* A lot of code has been taken from pwdmodule.c */ /* For info also see http://www.unixpapa.com/incnote/passwd.html */ #include "Python.h" #include "structseq.h" #include #ifdef HAVE_SHADOW_H #include #endif PyDoc_STRVAR(spwd__doc__, "This module provides access to the Unix shadow password database.\n\ It is available on various Unix versions.\n\ \n\ Shadow password database entries are reported as 9-tuples of type struct_spwd,\n\ containing the following items from the password database (see `'):\n\ sp_namp, sp_pwdp, sp_lstchg, sp_min, sp_max, sp_warn, sp_inact, sp_expire, sp_flag.\n\ The sp_namp and sp_pwdp are strings, the rest are integers.\n\ An exception is raised if the entry asked for cannot be found.\n\ You have to be root to be able to use this module."); #if defined(HAVE_GETSPNAM) || defined(HAVE_GETSPENT) static PyStructSequence_Field struct_spwd_type_fields[] = { {"sp_nam", "login name"}, {"sp_pwd", "encrypted password"}, {"sp_lstchg", "date of last change"}, {"sp_min", "min #days between changes"}, {"sp_max", "max #days between changes"}, {"sp_warn", "#days before pw expires to warn user about it"}, {"sp_inact", "#days after pw expires until account is blocked"}, {"sp_expire", "#days since 1970-01-01 until account is disabled"}, {"sp_flag", "reserved"}, {0} }; PyDoc_STRVAR(struct_spwd__doc__, "spwd.struct_spwd: Results from getsp*() routines.\n\n\ This object may be accessed either as a 9-tuple of\n\ (sp_nam,sp_pwd,sp_lstchg,sp_min,sp_max,sp_warn,sp_inact,sp_expire,sp_flag)\n\ or via the object attributes as named in the above tuple."); static PyStructSequence_Desc struct_spwd_type_desc = { "spwd.struct_spwd", struct_spwd__doc__, struct_spwd_type_fields, 9, }; static PyTypeObject StructSpwdType; static void sets(PyObject *v, int i, char* val) { if (val) PyStructSequence_SET_ITEM(v, i, PyString_FromString(val)); else { PyStructSequence_SET_ITEM(v, i, Py_None); Py_INCREF(Py_None); } } static PyObject *mkspent(struct spwd *p) { int setIndex = 0; PyObject *v = PyStructSequence_New(&StructSpwdType); if (v == NULL) return NULL; #define SETI(i,val) PyStructSequence_SET_ITEM(v, i, PyInt_FromLong((long) val)) #define SETS(i,val) sets(v, i, val) SETS(setIndex++, p->sp_namp); SETS(setIndex++, p->sp_pwdp); SETI(setIndex++, p->sp_lstchg); SETI(setIndex++, p->sp_min); SETI(setIndex++, p->sp_max); SETI(setIndex++, p->sp_warn); SETI(setIndex++, p->sp_inact); SETI(setIndex++, p->sp_expire); SETI(setIndex++, p->sp_flag); #undef SETS #undef SETI if (PyErr_Occurred()) { Py_XDECREF(v); return NULL; } return v; } #endif /* HAVE_GETSPNAM || HAVE_GETSPENT */ #ifdef HAVE_GETSPNAM PyDoc_STRVAR(spwd_getspnam__doc__, "getspnam(name) -> (sp_namp, sp_pwdp, sp_lstchg, sp_min, sp_max,\n\ sp_warn, sp_inact, sp_expire, sp_flag)\n\ Return the shadow password database entry for the given user name.\n\ See spwd.__doc__ for more on shadow password database entries."); static PyObject* spwd_getspnam(PyObject *self, PyObject *args) { char *name; struct spwd *p; if (!PyArg_ParseTuple(args, "s:getspnam", &name)) return NULL; if ((p = getspnam(name)) == NULL) { PyErr_SetString(PyExc_KeyError, "getspnam(): name not found"); return NULL; } return mkspent(p); } #endif /* HAVE_GETSPNAM */ #ifdef HAVE_GETSPENT PyDoc_STRVAR(spwd_getspall__doc__, "getspall() -> list_of_entries\n\ Return a list of all available shadow password database entries, \ in arbitrary order.\n\ See spwd.__doc__ for more on shadow password database entries."); static PyObject * spwd_getspall(PyObject *self, PyObject *args) { PyObject *d; struct spwd *p; if ((d = PyList_New(0)) == NULL) return NULL; setspent(); while ((p = getspent()) != NULL) { PyObject *v = mkspent(p); if (v == NULL || PyList_Append(d, v) != 0) { Py_XDECREF(v); Py_DECREF(d); endspent(); return NULL; } Py_DECREF(v); } endspent(); return d; } #endif /* HAVE_GETSPENT */ static PyMethodDef spwd_methods[] = { #ifdef HAVE_GETSPNAM {"getspnam", spwd_getspnam, METH_VARARGS, spwd_getspnam__doc__}, #endif #ifdef HAVE_GETSPENT {"getspall", spwd_getspall, METH_NOARGS, spwd_getspall__doc__}, #endif {NULL, NULL} /* sentinel */ }; PyMODINIT_FUNC initspwd(void) { PyObject *m; m=Py_InitModule3("spwd", spwd_methods, spwd__doc__); PyStructSequence_InitType(&StructSpwdType, &struct_spwd_type_desc); Py_INCREF((PyObject *) &StructSpwdType); PyModule_AddObject(m, "struct_spwd", (PyObject *) &StructSpwdType); } Index: Setup.dist =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/Setup.dist,v retrieving revision 1.28.2.2 retrieving revision 1.28.2.3 diff -u -d -r1.28.2.2 -r1.28.2.3 --- Setup.dist 7 Jan 2005 07:02:51 -0000 1.28.2.2 +++ Setup.dist 16 Oct 2005 05:24:04 -0000 1.28.2.3 @@ -170,6 +170,10 @@ #operator operator.c # operator.add() and similar goodies #_weakref _weakref.c # basic weak reference support #_testcapi _testcapimodule.c # Python C API test module +#_random _randommodule.c # Random number generator +#collections collectionsmodule.c # Container types +#itertools itertoolsmodule.c # Functions creating iterators for efficient looping +#strop stropmodule.c # String manipulations #unicodedata unicodedata.c # static Unicode character database @@ -183,6 +187,7 @@ #fcntl fcntlmodule.c # fcntl(2) and ioctl(2) #pwd pwdmodule.c # pwd(3) +#spwd spwdmodule.c # spwd(3) #grp grpmodule.c # grp(3) #select selectmodule.c # select(2); not on ancient System V Index: _bisectmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/_bisectmodule.c,v retrieving revision 1.2.6.1 retrieving revision 1.2.6.2 diff -u -d -r1.2.6.1 -r1.2.6.2 --- _bisectmodule.c 7 Jan 2005 07:02:51 -0000 1.2.6.1 +++ _bisectmodule.c 16 Oct 2005 05:24:04 -0000 1.2.6.2 @@ -34,15 +34,16 @@ } static PyObject * -bisect_right(PyObject *self, PyObject *args) +bisect_right(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item; int lo = 0; int hi = -1; int index; + static char *keywords[] = {"a", "x", "lo", "hi", NULL}; - if (!PyArg_ParseTuple(args, "OO|ii:bisect_right", - &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|ii:bisect_right", + keywords, &list, &item, &lo, &hi)) return NULL; index = internal_bisect_right(list, item, lo, hi); if (index < 0) @@ -51,7 +52,7 @@ } PyDoc_STRVAR(bisect_right_doc, -"bisect_right(list, item[, lo[, hi]]) -> index\n\ +"bisect_right(a, x[, lo[, hi]]) -> index\n\ \n\ Return the index where to insert item x in list a, assuming a is sorted.\n\ \n\ @@ -63,15 +64,16 @@ slice of a to be searched.\n"); static PyObject * -insort_right(PyObject *self, PyObject *args) +insort_right(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item, *result; int lo = 0; int hi = -1; int index; + static char *keywords[] = {"a", "x", "lo", "hi", NULL}; - if (!PyArg_ParseTuple(args, "OO|ii:insort_right", - &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|ii:insort_right", + keywords, &list, &item, &lo, &hi)) return NULL; index = internal_bisect_right(list, item, lo, hi); if (index < 0) @@ -91,7 +93,7 @@ } PyDoc_STRVAR(insort_right_doc, -"insort_right(list, item[, lo[, hi]])\n\ +"insort_right(a, x[, lo[, hi]])\n\ \n\ Insert item x in list a, and keep it sorted assuming a is sorted.\n\ \n\ @@ -129,15 +131,16 @@ } static PyObject * -bisect_left(PyObject *self, PyObject *args) +bisect_left(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item; int lo = 0; int hi = -1; int index; + static char *keywords[] = {"a", "x", "lo", "hi", NULL}; - if (!PyArg_ParseTuple(args, "OO|ii:bisect_left", - &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|ii:bisect_left", + keywords, &list, &item, &lo, &hi)) return NULL; index = internal_bisect_left(list, item, lo, hi); if (index < 0) @@ -146,7 +149,7 @@ } PyDoc_STRVAR(bisect_left_doc, -"bisect_left(list, item[, lo[, hi]]) -> index\n\ +"bisect_left(a, x[, lo[, hi]]) -> index\n\ \n\ Return the index where to insert item x in list a, assuming a is sorted.\n\ \n\ @@ -158,15 +161,16 @@ slice of a to be searched.\n"); static PyObject * -insort_left(PyObject *self, PyObject *args) +insort_left(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item, *result; int lo = 0; int hi = -1; int index; + static char *keywords[] = {"a", "x", "lo", "hi", NULL}; - if (!PyArg_ParseTuple(args, "OO|ii:insort_left", - &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|ii:insort_left", + keywords, &list, &item, &lo, &hi)) return NULL; index = internal_bisect_left(list, item, lo, hi); if (index < 0) @@ -186,7 +190,7 @@ } PyDoc_STRVAR(insort_left_doc, -"insort_left(list, item[, lo[, hi]])\n\ +"insort_left(a, x[, lo[, hi]])\n\ \n\ Insert item x in list a, and keep it sorted assuming a is sorted.\n\ \n\ @@ -200,17 +204,17 @@ static PyMethodDef bisect_methods[] = { {"bisect_right", (PyCFunction)bisect_right, - METH_VARARGS, bisect_right_doc}, + METH_VARARGS|METH_KEYWORDS, bisect_right_doc}, {"bisect", (PyCFunction)bisect_right, - METH_VARARGS, bisect_doc}, + METH_VARARGS|METH_KEYWORDS, bisect_doc}, {"insort_right", (PyCFunction)insort_right, - METH_VARARGS, insort_right_doc}, + METH_VARARGS|METH_KEYWORDS, insort_right_doc}, {"insort", (PyCFunction)insort_right, - METH_VARARGS, insort_doc}, + METH_VARARGS|METH_KEYWORDS, insort_doc}, {"bisect_left", (PyCFunction)bisect_left, - METH_VARARGS, bisect_left_doc}, + METH_VARARGS|METH_KEYWORDS, bisect_left_doc}, {"insort_left", (PyCFunction)insort_left, - METH_VARARGS, insort_left_doc}, + METH_VARARGS|METH_KEYWORDS, insort_left_doc}, {NULL, NULL} /* sentinel */ }; Index: _bsddb.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/_bsddb.c,v retrieving revision 1.13.4.2 retrieving revision 1.13.4.3 diff -u -d -r1.13.4.2 -r1.13.4.3 --- _bsddb.c 7 Jan 2005 07:02:51 -0000 1.13.4.2 +++ _bsddb.c 16 Oct 2005 05:24:04 -0000 1.13.4.3 @@ -97,7 +97,7 @@ #error "eek! DBVER can't handle minor versions > 9" #endif -#define PY_BSDDB_VERSION "4.3.0" +#define PY_BSDDB_VERSION "4.3.3" static char *rcs_id = "$Id$"; @@ -153,7 +153,7 @@ static PyObject* DBError; /* Base class, all others derive from this */ static PyObject* DBCursorClosedError; /* raised when trying to use a closed cursor object */ -static PyObject* DBKeyEmptyError; /* DB_KEYEMPTY */ +static PyObject* DBKeyEmptyError; /* DB_KEYEMPTY: also derives from KeyError */ static PyObject* DBKeyExistError; /* DB_KEYEXIST */ static PyObject* DBLockDeadlockError; /* DB_LOCK_DEADLOCK */ static PyObject* DBLockNotGrantedError; /* DB_LOCK_NOTGRANTED */ @@ -212,10 +212,10 @@ struct behaviourFlags { /* What is the default behaviour when DB->get or DBCursor->get returns a - DB_NOTFOUND error? Return None or raise an exception? */ + DB_NOTFOUND || DB_KEYEMPTY error? Return None or raise an exception? */ unsigned int getReturnsNone : 1; /* What is the default behaviour for DBCursor.set* methods when DBCursor->get - * returns a DB_NOTFOUND error? Return None or raise an exception? */ + * returns a DB_NOTFOUND || DB_KEYEMPTY error? Return None or raise? */ unsigned int cursorSetReturnsNone : 1; }; @@ -244,6 +244,7 @@ struct behaviourFlags moduleFlags; #if (DBVER >= 33) PyObject* associateCallback; + PyObject* btCompareCallback; int primaryDBType; #endif #ifdef HAVE_WEAKREF @@ -672,7 +673,8 @@ err = self->dbc->c_get(self->dbc, &key, &data, flags); MYDB_END_ALLOW_THREADS; - if ((err == DB_NOTFOUND) && self->mydb->moduleFlags.getReturnsNone) { + if ((err == DB_NOTFOUND || err == DB_KEYEMPTY) + && self->mydb->moduleFlags.getReturnsNone) { Py_INCREF(Py_None); retval = Py_None; } @@ -741,6 +743,7 @@ self->myenvobj = NULL; #if (DBVER >= 33) self->associateCallback = NULL; + self->btCompareCallback = NULL; self->primaryDBType = 0; #endif #ifdef HAVE_WEAKREF @@ -815,6 +818,10 @@ Py_DECREF(self->associateCallback); self->associateCallback = NULL; } + if (self->btCompareCallback != NULL) { + Py_DECREF(self->btCompareCallback); + self->btCompareCallback = NULL; + } #endif PyObject_Del(self); } @@ -1165,6 +1172,7 @@ makeTypeError("DB", (PyObject*)secondaryDB); return NULL; } + CHECK_DB_NOT_CLOSED(secondaryDB); if (callback == Py_None) { callback = NULL; } @@ -1174,9 +1182,7 @@ } /* Save a reference to the callback in the secondary DB. */ - if (self->associateCallback != NULL) { - Py_DECREF(self->associateCallback); - } + Py_XDECREF(secondaryDB->associateCallback); Py_INCREF(callback); secondaryDB->associateCallback = callback; secondaryDB->primaryDBType = _DB_get_type(self); @@ -1210,8 +1216,8 @@ MYDB_END_ALLOW_THREADS; if (err) { - Py_DECREF(self->associateCallback); - self->associateCallback = NULL; + Py_XDECREF(secondaryDB->associateCallback); + secondaryDB->associateCallback = NULL; secondaryDB->primaryDBType = 0; } @@ -1279,7 +1285,8 @@ err = self->db->get(self->db, txn, &key, &data, flags|consume_flag); MYDB_END_ALLOW_THREADS; - if ((err == DB_NOTFOUND) && self->moduleFlags.getReturnsNone) { + if ((err == DB_NOTFOUND || err == DB_KEYEMPTY) + && self->moduleFlags.getReturnsNone) { err = 0; Py_INCREF(Py_None); retval = Py_None; @@ -1424,12 +1431,13 @@ err = self->db->get(self->db, txn, &key, &data, flags); MYDB_END_ALLOW_THREADS; - if ((err == DB_NOTFOUND) && (dfltobj != NULL)) { + if ((err == DB_NOTFOUND || err == DB_KEYEMPTY) && (dfltobj != NULL)) { err = 0; Py_INCREF(dfltobj); retval = dfltobj; } - else if ((err == DB_NOTFOUND) && self->moduleFlags.getReturnsNone) { + else if ((err == DB_NOTFOUND || err == DB_KEYEMPTY) + && self->moduleFlags.getReturnsNone) { err = 0; Py_INCREF(Py_None); retval = Py_None; @@ -1493,12 +1501,13 @@ err = self->db->pget(self->db, txn, &key, &pkey, &data, flags); MYDB_END_ALLOW_THREADS; - if ((err == DB_NOTFOUND) && (dfltobj != NULL)) { + if ((err == DB_NOTFOUND || err == DB_KEYEMPTY) && (dfltobj != NULL)) { err = 0; Py_INCREF(dfltobj); retval = dfltobj; } - else if ((err == DB_NOTFOUND) && self->moduleFlags.getReturnsNone) { + else if ((err == DB_NOTFOUND || err == DB_KEYEMPTY) + && self->moduleFlags.getReturnsNone) { err = 0; Py_INCREF(Py_None); retval = Py_None; @@ -1623,7 +1632,8 @@ err = self->db->get(self->db, txn, &key, &data, flags); MYDB_END_ALLOW_THREADS; - if ((err == DB_NOTFOUND) && self->moduleFlags.getReturnsNone) { + if ((err == DB_NOTFOUND || err == DB_KEYEMPTY) + && self->moduleFlags.getReturnsNone) { err = 0; Py_INCREF(Py_None); retval = Py_None; @@ -1959,6 +1969,158 @@ RETURN_NONE(); } +static int +_default_cmp (const DBT *leftKey, + const DBT *rightKey) +{ + int res; + int lsize = leftKey->size, rsize = rightKey->size; + + res = memcmp (leftKey->data, rightKey->data, + lsize < rsize ? lsize : rsize); + + if (res == 0) { + if (lsize < rsize) { + res = -1; + } + else if (lsize > rsize) { + res = 1; + } + } + return res; +} + +static int +_db_compareCallback (DB* db, + const DBT *leftKey, + const DBT *rightKey) +{ + int res = 0; + PyObject *args; + PyObject *result; + PyObject *leftObject; + PyObject *rightObject; + DBObject *self = (DBObject *) db->app_private; + + if (self == NULL || self->btCompareCallback == NULL) { + MYDB_BEGIN_BLOCK_THREADS; + PyErr_SetString (PyExc_TypeError, + (self == 0 + ? "DB_bt_compare db is NULL." + : "DB_bt_compare callback is NULL.")); + /* we're in a callback within the DB code, we can't raise */ + PyErr_Print (); + res = _default_cmp (leftKey, rightKey); + MYDB_END_BLOCK_THREADS; + } + else { + MYDB_BEGIN_BLOCK_THREADS; + + leftObject = PyString_FromStringAndSize (leftKey->data, leftKey->size); + rightObject = PyString_FromStringAndSize (rightKey->data, rightKey->size); + + args = PyTuple_New (2); + Py_INCREF (self); + PyTuple_SET_ITEM (args, 0, leftObject); /* steals reference */ + PyTuple_SET_ITEM (args, 1, rightObject); /* steals reference */ + + result = PyEval_CallObject (self->btCompareCallback, args); + if (result == 0) { + /* we're in a callback within the DB code, we can't raise */ + PyErr_Print (); + res = _default_cmp (leftKey, rightKey); + } + else if (PyInt_Check (result)) { + res = PyInt_AsLong (result); + } + else { + PyErr_SetString (PyExc_TypeError, + "DB_bt_compare callback MUST return an int."); + /* we're in a callback within the DB code, we can't raise */ + PyErr_Print (); + res = _default_cmp (leftKey, rightKey); + } + + Py_DECREF (args); + Py_XDECREF (result); + + MYDB_END_BLOCK_THREADS; + } + return res; +} + +static PyObject* +DB_set_bt_compare (DBObject* self, PyObject* args) +{ + int err; + PyObject *comparator; + PyObject *tuple, *emptyStr, *result; + + if (!PyArg_ParseTuple(args,"O:set_bt_compare", &comparator )) + return NULL; + + CHECK_DB_NOT_CLOSED (self); + + if (! PyCallable_Check (comparator)) { + makeTypeError ("Callable", comparator); + return NULL; + } + + /* + * Perform a test call of the comparator function with two empty + * string objects here. verify that it returns an int (0). + * err if not. + */ + tuple = PyTuple_New (2); + + emptyStr = PyString_FromStringAndSize (NULL, 0); + Py_INCREF(emptyStr); + PyTuple_SET_ITEM (tuple, 0, emptyStr); + PyTuple_SET_ITEM (tuple, 1, emptyStr); /* steals reference */ + result = PyEval_CallObject (comparator, tuple); + Py_DECREF (tuple); + if (result == 0 || !PyInt_Check(result)) { + PyErr_SetString (PyExc_TypeError, + "callback MUST return an int"); + return NULL; + } + else if (PyInt_AsLong(result) != 0) { + PyErr_SetString (PyExc_TypeError, + "callback failed to return 0 on two empty strings"); + return NULL; + } + + /* We don't accept multiple set_bt_compare operations, in order to + * simplify the code. This would have no real use, as one cannot + * change the function once the db is opened anyway */ + if (self->btCompareCallback != NULL) { + PyErr_SetString (PyExc_RuntimeError, "set_bt_compare () cannot be called more than once"); + return NULL; + } + + Py_INCREF (comparator); + self->btCompareCallback = comparator; + + /* This is to workaround a problem with un-initialized threads (see + comment in DB_associate) */ +#ifdef WITH_THREAD + PyEval_InitThreads(); +#endif + + err = self->db->set_bt_compare (self->db, + (comparator != NULL ? + _db_compareCallback : NULL)); + + if (err) { + /* restore the old state in case of error */ + Py_DECREF (comparator); + self->btCompareCallback = NULL; + } + + RETURN_IF_ERR (); + RETURN_NONE (); +} + static PyObject* DB_set_cachesize(DBObject* self, PyObject* args) @@ -2584,7 +2746,15 @@ err = self->db->get(self->db, txn, &key, &data, 0); MYDB_END_ALLOW_THREADS; FREE_DBT(key); - return PyInt_FromLong((err == DB_BUFFER_SMALL) || (err == 0)); + + if (err == DB_BUFFER_SMALL || err == 0) { + return PyInt_FromLong(1); + } else if (err == DB_NOTFOUND || err == DB_KEYEMPTY) { + return PyInt_FromLong(0); + } + + makeDBError(err); + return NULL; } @@ -2682,8 +2852,8 @@ Py_DECREF(item); } - /* DB_NOTFOUND is okay, it just means we got to the end */ - if (err != DB_NOTFOUND && makeDBError(err)) { + /* DB_NOTFOUND || DB_KEYEMPTY is okay, it means we got to the end */ + if (err != DB_NOTFOUND && err != DB_KEYEMPTY && makeDBError(err)) { Py_DECREF(list); list = NULL; } @@ -2890,7 +3060,8 @@ err = self->dbc->c_get(self->dbc, &key, &data, flags); MYDB_END_ALLOW_THREADS; - if ((err == DB_NOTFOUND) && self->mydb->moduleFlags.getReturnsNone) { + if ((err == DB_NOTFOUND || err == DB_KEYEMPTY) + && self->mydb->moduleFlags.getReturnsNone) { Py_INCREF(Py_None); retval = Py_None; } @@ -2977,7 +3148,8 @@ err = self->dbc->c_pget(self->dbc, &key, &pkey, &data, flags); MYDB_END_ALLOW_THREADS; - if ((err == DB_NOTFOUND) && self->mydb->moduleFlags.getReturnsNone) { + if ((err == DB_NOTFOUND || err == DB_KEYEMPTY) + && self->mydb->moduleFlags.getReturnsNone) { Py_INCREF(Py_None); retval = Py_None; } @@ -3144,7 +3316,8 @@ MYDB_BEGIN_ALLOW_THREADS; err = self->dbc->c_get(self->dbc, &key, &data, flags|DB_SET); MYDB_END_ALLOW_THREADS; - if ((err == DB_NOTFOUND) && self->mydb->moduleFlags.cursorSetReturnsNone) { + if ((err == DB_NOTFOUND || err == DB_KEYEMPTY) + && self->mydb->moduleFlags.cursorSetReturnsNone) { Py_INCREF(Py_None); retval = Py_None; } @@ -3216,7 +3389,8 @@ MYDB_BEGIN_ALLOW_THREADS; err = self->dbc->c_get(self->dbc, &key, &data, flags|DB_SET_RANGE); MYDB_END_ALLOW_THREADS; - if ((err == DB_NOTFOUND) && self->mydb->moduleFlags.cursorSetReturnsNone) { + if ((err == DB_NOTFOUND || err == DB_KEYEMPTY) + && self->mydb->moduleFlags.cursorSetReturnsNone) { Py_INCREF(Py_None); retval = Py_None; } @@ -3271,7 +3445,7 @@ MYDB_BEGIN_ALLOW_THREADS; err = self->dbc->c_get(self->dbc, &key, &data, flags|DB_GET_BOTH); MYDB_END_ALLOW_THREADS; - if ((err == DB_NOTFOUND) && returnsNone) { + if ((err == DB_NOTFOUND || err == DB_KEYEMPTY) && returnsNone) { Py_INCREF(Py_None); retval = Py_None; } @@ -3411,7 +3585,8 @@ MYDB_BEGIN_ALLOW_THREADS; err = self->dbc->c_get(self->dbc, &key, &data, flags|DB_SET_RECNO); MYDB_END_ALLOW_THREADS; - if ((err == DB_NOTFOUND) && self->mydb->moduleFlags.cursorSetReturnsNone) { + if ((err == DB_NOTFOUND || err == DB_KEYEMPTY) + && self->mydb->moduleFlags.cursorSetReturnsNone) { Py_INCREF(Py_None); retval = Py_None; } @@ -3479,7 +3654,8 @@ MYDB_BEGIN_ALLOW_THREADS; err = self->dbc->c_get(self->dbc, &key, &data, flags | DB_JOIN_ITEM); MYDB_END_ALLOW_THREADS; - if ((err == DB_NOTFOUND) && self->mydb->moduleFlags.getReturnsNone) { + if ((err == DB_NOTFOUND || err == DB_KEYEMPTY) + && self->mydb->moduleFlags.getReturnsNone) { Py_INCREF(Py_None); retval = Py_None; } @@ -3781,6 +3957,23 @@ static PyObject* +DBEnv_set_lg_regionmax(DBEnvObject* self, PyObject* args) +{ + int err, lg_max; + + if (!PyArg_ParseTuple(args, "i:set_lg_regionmax", &lg_max)) + return NULL; + CHECK_ENV_NOT_CLOSED(self); + + MYDB_BEGIN_ALLOW_THREADS; + err = self->db_env->set_lg_regionmax(self->db_env, lg_max); + MYDB_END_ALLOW_THREADS; + RETURN_IF_ERR(); + RETURN_NONE(); +} + + +static PyObject* DBEnv_set_lk_detect(DBEnvObject* self, PyObject* args) { int err, lk_detect; @@ -4400,6 +4593,7 @@ {"remove", (PyCFunction)DB_remove, METH_VARARGS|METH_KEYWORDS}, {"rename", (PyCFunction)DB_rename, METH_VARARGS}, {"set_bt_minkey", (PyCFunction)DB_set_bt_minkey, METH_VARARGS}, + {"set_bt_compare", (PyCFunction)DB_set_bt_compare, METH_VARARGS}, {"set_cachesize", (PyCFunction)DB_set_cachesize, METH_VARARGS}, #if (DBVER >= 41) {"set_encrypt", (PyCFunction)DB_set_encrypt, METH_VARARGS|METH_KEYWORDS}, @@ -4489,6 +4683,7 @@ {"set_lg_bsize", (PyCFunction)DBEnv_set_lg_bsize, METH_VARARGS}, {"set_lg_dir", (PyCFunction)DBEnv_set_lg_dir, METH_VARARGS}, {"set_lg_max", (PyCFunction)DBEnv_set_lg_max, METH_VARARGS}, + {"set_lg_regionmax",(PyCFunction)DBEnv_set_lg_regionmax, METH_VARARGS}, {"set_lk_detect", (PyCFunction)DBEnv_set_lk_detect, METH_VARARGS}, {"set_lk_max", (PyCFunction)DBEnv_set_lk_max, METH_VARARGS}, #if (DBVER >= 32) @@ -5117,12 +5312,15 @@ DBError = PyErr_NewException("bsddb._db.DBError", NULL, NULL); PyDict_SetItemString(d, "DBError", DBError); - /* Some magic to make DBNotFoundError derive from both DBError and - KeyError, since the API only supports using one base class. */ + /* Some magic to make DBNotFoundError and DBKeyEmptyError derive + * from both DBError and KeyError, since the API only supports + * using one base class. */ PyDict_SetItemString(d, "KeyError", PyExc_KeyError); - PyRun_String("class DBNotFoundError(DBError, KeyError): pass", + PyRun_String("class DBNotFoundError(DBError, KeyError): pass\n" + "class DBKeyEmptyError(DBError, KeyError): pass", Py_file_input, d, d); DBNotFoundError = PyDict_GetItemString(d, "DBNotFoundError"); + DBKeyEmptyError = PyDict_GetItemString(d, "DBKeyEmptyError"); PyDict_DelItemString(d, "KeyError"); Index: _codecsmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/_codecsmodule.c,v retrieving revision 2.12.2.2 retrieving revision 2.12.2.3 diff -u -d -r2.12.2.2 -r2.12.2.3 --- _codecsmodule.c 7 Jan 2005 07:02:51 -0000 2.12.2.2 +++ _codecsmodule.c 16 Oct 2005 05:24:04 -0000 2.12.2.3 @@ -104,8 +104,15 @@ if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors)) return NULL; +#ifdef Py_USING_UNICODE if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); +#else + if (encoding == NULL) { + PyErr_SetString(PyExc_ValueError, "no encoding specified"); + return NULL; + } +#endif /* Encode via the codec registry */ v = PyCodec_Encode(v, encoding, errors); @@ -137,8 +144,15 @@ if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors)) return NULL; +#ifdef Py_USING_UNICODE if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); +#else + if (encoding == NULL) { + PyErr_SetString(PyExc_ValueError, "no encoding specified"); + return NULL; + } +#endif /* Decode via the codec registry */ v = PyCodec_Decode(v, encoding, errors); @@ -240,8 +254,8 @@ else { if (PyObject_AsReadBuffer(obj, (const void **)&data, &size)) return NULL; - return codec_tuple(PyUnicode_FromUnicode((Py_UNICODE *)data, - size / sizeof(Py_UNICODE)), + + return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors), size); } } Index: _csv.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/_csv.c,v retrieving revision 1.11.4.2 retrieving revision 1.11.4.3 diff -u -d -r1.11.4.2 -r1.11.4.3 --- _csv.c 7 Jan 2005 07:02:52 -0000 1.11.4.2 +++ _csv.c 16 Oct 2005 05:24:04 -0000 1.11.4.3 @@ -39,12 +39,17 @@ #endif /* end 2.2 compatibility macros */ +#define IS_BASESTRING(o) \ + PyObject_TypeCheck(o, &PyBaseString_Type) + static PyObject *error_obj; /* CSV exception */ static PyObject *dialects; /* Dialect registry */ +static long field_limit = 128 * 1024; /* max parsed field size */ typedef enum { [...1406 lines suppressed...] + { "register_dialect", (PyCFunction)csv_register_dialect, + METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc}, + { "unregister_dialect", (PyCFunction)csv_unregister_dialect, + METH_O, csv_unregister_dialect_doc}, + { "get_dialect", (PyCFunction)csv_get_dialect, + METH_O, csv_get_dialect_doc}, + { "field_size_limit", (PyCFunction)csv_field_size_limit, + METH_VARARGS, csv_field_size_limit_doc}, + { NULL, NULL } }; PyMODINIT_FUNC @@ -1545,6 +1587,7 @@ } /* Add the Dialect type */ + Py_INCREF(&Dialect_Type); if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type)) return; Index: _cursesmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/_cursesmodule.c,v retrieving revision 2.65.2.2 retrieving revision 2.65.2.3 diff -u -d -r2.65.2.2 -r2.65.2.3 --- _cursesmodule.c 7 Jan 2005 07:02:52 -0000 2.65.2.2 +++ _cursesmodule.c 16 Oct 2005 05:24:04 -0000 2.65.2.3 @@ -162,6 +162,10 @@ "must call start_color() first"); \ return 0; } +#ifndef MIN +#define MIN(x,y) ((x) < (y) ? (x) : (y)) +#endif + /* Utility Functions */ /* @@ -801,21 +805,21 @@ switch (PyTuple_Size(args)) { case 0: Py_BEGIN_ALLOW_THREADS - rtn2 = wgetstr(self->win,rtn); + rtn2 = wgetnstr(self->win,rtn, 1023); Py_END_ALLOW_THREADS break; case 1: if (!PyArg_ParseTuple(args,"i;n", &n)) return NULL; Py_BEGIN_ALLOW_THREADS - rtn2 = wgetnstr(self->win,rtn,n); + rtn2 = wgetnstr(self->win,rtn,MIN(n, 1023)); Py_END_ALLOW_THREADS break; case 2: if (!PyArg_ParseTuple(args,"ii;y,x",&y,&x)) return NULL; Py_BEGIN_ALLOW_THREADS - rtn2 = mvwgetstr(self->win,y,x,rtn); + rtn2 = mvwgetnstr(self->win,y,x,rtn, 1023); Py_END_ALLOW_THREADS break; case 3: @@ -825,11 +829,11 @@ /* Untested */ Py_BEGIN_ALLOW_THREADS rtn2 = wmove(self->win,y,x)==ERR ? ERR : - wgetnstr(self->win, rtn, n); + wgetnstr(self->win, rtn, MIN(n, 1023)); Py_END_ALLOW_THREADS #else Py_BEGIN_ALLOW_THREADS - rtn2 = mvwgetnstr(self->win, y, x, rtn, n); + rtn2 = mvwgetnstr(self->win, y, x, rtn, MIN(n, 1023)); Py_END_ALLOW_THREADS #endif break; @@ -962,22 +966,22 @@ switch (PyTuple_Size(args)) { case 0: - rtn2 = winstr(self->win,rtn); + rtn2 = winnstr(self->win,rtn, 1023); break; case 1: if (!PyArg_ParseTuple(args,"i;n", &n)) return NULL; - rtn2 = winnstr(self->win,rtn,n); + rtn2 = winnstr(self->win,rtn,MIN(n,1023)); break; case 2: if (!PyArg_ParseTuple(args,"ii;y,x",&y,&x)) return NULL; - rtn2 = mvwinstr(self->win,y,x,rtn); + rtn2 = mvwinnstr(self->win,y,x,rtn,1023); break; case 3: if (!PyArg_ParseTuple(args, "iii;y,x,n", &y, &x, &n)) return NULL; - rtn2 = mvwinnstr(self->win, y, x, rtn, n); + rtn2 = mvwinnstr(self->win, y, x, rtn, MIN(n,1023)); break; default: PyErr_SetString(PyExc_TypeError, "instr requires 0 or 3 arguments"); @@ -2103,7 +2107,7 @@ return NULL; } - if (!pair_content(pair, &f, &b)) { + if (pair_content(pair, &f, &b)==ERR) { PyErr_SetString(PyCursesError, "Argument 1 was out of range. (1..COLOR_PAIRS-1)"); return NULL; Index: _localemodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/_localemodule.c,v retrieving revision 2.33.2.2 retrieving revision 2.33.2.3 diff -u -d -r2.33.2.2 -r2.33.2.3 --- _localemodule.c 7 Jan 2005 07:02:53 -0000 2.33.2.2 +++ _localemodule.c 16 Oct 2005 05:24:04 -0000 2.33.2.3 @@ -426,7 +426,7 @@ /* XXX which one is mac-latin2? */ } if (!name) { - /* This leaks a an object. */ + /* This leaks an object. */ name = CFStringConvertEncodingToIANACharSetName(enc); } return (char *)CFStringGetCStringPtr(name, 0); Index: _randommodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/_randommodule.c,v retrieving revision 1.5.4.2 retrieving revision 1.5.4.3 diff -u -d -r1.5.4.2 -r1.5.4.3 --- _randommodule.c 7 Jan 2005 07:02:53 -0000 1.5.4.2 +++ _randommodule.c 16 Oct 2005 05:24:04 -0000 1.5.4.3 @@ -481,6 +481,9 @@ RandomObject *self; PyObject *tmp; + if (!_PyArg_NoKeywords("Random()", kwds)) + return NULL; + self = (RandomObject *)type->tp_alloc(type, 0); if (self == NULL) return NULL; Index: _tkinter.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/_tkinter.c,v retrieving revision 1.125.2.2 retrieving revision 1.125.2.3 diff -u -d -r1.125.2.2 -r1.125.2.3 --- _tkinter.c 7 Jan 2005 07:02:54 -0000 1.125.2.2 +++ _tkinter.c 16 Oct 2005 05:24:04 -0000 1.125.2.3 @@ -838,8 +838,10 @@ }; static PyMethodDef PyTclObject_methods[] = { +#ifdef Py_USING_UNICODE {"__unicode__", (PyCFunction)PyTclObject_unicode, METH_NOARGS, PyTclObject_unicode__doc__}, +#endif {0} }; @@ -991,7 +993,7 @@ } } #else - res = PyString_FromStringAndSize(value->bytes, value->length); + result = PyString_FromStringAndSize(value->bytes, value->length); #endif return result; } Index: arraymodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/arraymodule.c,v retrieving revision 2.75.2.2 retrieving revision 2.75.2.3 diff -u -d -r2.75.2.2 -r2.75.2.3 --- arraymodule.c 7 Jan 2005 07:02:55 -0000 2.75.2.2 +++ arraymodule.c 16 Oct 2005 05:24:04 -0000 2.75.2.3 @@ -1799,18 +1799,9 @@ char c; PyObject *initial = NULL, *it = NULL; struct arraydescr *descr; - - if (kwds != NULL) { - int i = PyObject_Length(kwds); - if (i < 0) - return NULL; - else if (i > 0) { - PyErr_SetString(PyExc_TypeError, - "array.array constructor takes " - "no keyword arguments"); - return NULL; - } - } + + if (!_PyArg_NoKeywords("array.array()", kwds)) + return NULL; if (!PyArg_ParseTuple(args, "c|O:array", &c, &initial)) return NULL; Index: bz2module.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/bz2module.c,v retrieving revision 1.17.2.2 retrieving revision 1.17.2.3 diff -u -d -r1.17.2.2 -r1.17.2.3 --- bz2module.c 7 Jan 2005 07:02:56 -0000 1.17.2.2 +++ bz2module.c 16 Oct 2005 05:24:04 -0000 1.17.2.3 @@ -22,6 +22,18 @@ Gustavo Niemeyer \n\ "; +/* Our very own off_t-like type, 64-bit if possible */ +/* copied from Objects/fileobject.c */ +#if !defined(HAVE_LARGEFILE_SUPPORT) +typedef off_t Py_off_t; +#elif SIZEOF_OFF_T >= 8 +typedef off_t Py_off_t; +#elif SIZEOF_FPOS_T >= 8 +typedef fpos_t Py_off_t; +#else +#error "Large file support, but neither off_t nor fpos_t is large enough." +#endif + #define BUF(v) PyString_AS_STRING((PyStringObject *)v) #define MODE_CLOSED 0 @@ -98,8 +110,8 @@ BZFILE *fp; int mode; - long pos; - long size; + Py_off_t pos; + Py_off_t size; #ifdef WITH_THREAD PyThread_type_lock lock; #endif @@ -405,7 +417,9 @@ Util_DropReadAhead(f); } if (f->mode == MODE_READ_EOF) { - return -1; + f->f_bufptr = f->f_buf; + f->f_bufend = f->f_buf; + return 0; } if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) { return -1; @@ -682,13 +696,13 @@ } totalread += nread; p = memchr(buffer+nfilled, '\n', nread); - if (p == NULL) { + if (!shortread && p == NULL) { /* Need a larger buffer to fit this line */ nfilled += nread; buffersize *= 2; if (buffersize > INT_MAX) { PyErr_SetString(PyExc_OverflowError, - "line is longer than a Python string can hold"); + "line is longer than a Python string can hold"); goto error; } if (big_buffer == NULL) { @@ -705,11 +719,11 @@ _PyString_Resize(&big_buffer, buffersize); buffer = PyString_AS_STRING(big_buffer); } - continue; + continue; } end = buffer+nfilled+nread; q = buffer; - do { + while (p != NULL) { /* Process complete lines */ p++; line = PyString_FromStringAndSize(q, p-q); @@ -721,7 +735,7 @@ goto error; q = p; p = memchr(q, '\n', end-q); - } while (p != NULL); + } /* Move the remaining incomplete line to the start */ nfilled = end-q; memmove(buffer, q, nfilled); @@ -962,18 +976,27 @@ BZ2File_seek(BZ2FileObject *self, PyObject *args) { int where = 0; - long offset; + PyObject *offobj; + Py_off_t offset; char small_buffer[SMALLCHUNK]; char *buffer = small_buffer; size_t buffersize = SMALLCHUNK; int bytesread = 0; - int readsize; + size_t readsize; int chunksize; int bzerror; int rewind = 0; PyObject *ret = NULL; - if (!PyArg_ParseTuple(args, "l|i:seek", &offset, &where)) + if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where)) + return NULL; +#if !defined(HAVE_LARGEFILE_SUPPORT) + offset = PyInt_AsLong(offobj); +#else + offset = PyLong_Check(offobj) ? + PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj); +#endif + if (PyErr_Occurred()) return NULL; ACQUIRE_LOCK(self); @@ -1066,10 +1089,13 @@ /* Before getting here, offset must be set to the number of bytes * to walk forward. */ for (;;) { - if ((size_t)offset-bytesread > buffersize) + if (offset-bytesread > buffersize) readsize = buffersize; else - readsize = offset-bytesread; + /* offset might be wider that readsize, but the result + * of the subtraction is bound by buffersize (see the + * condition above). buffersize is 8192. */ + readsize = (size_t)(offset-bytesread); Py_BEGIN_ALLOW_THREADS chunksize = Util_UnivNewlineRead(&bzerror, self->fp, buffer, readsize, self); @@ -1114,7 +1140,11 @@ goto cleanup; } +#if !defined(HAVE_LARGEFILE_SUPPORT) ret = PyInt_FromLong(self->pos); +#else + ret = PyLong_FromLongLong(self->pos); +#endif cleanup: return ret; @@ -1308,6 +1338,10 @@ break; } + if (mode_char == 0) { + mode_char = 'r'; + } + mode = (mode_char == 'r') ? "rb" : "wb"; self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)", Index: cStringIO.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/cStringIO.c,v retrieving revision 2.36.2.2 retrieving revision 2.36.2.3 diff -u -d -r2.36.2.2 -r2.36.2.3 --- cStringIO.c 7 Jan 2005 07:02:57 -0000 2.36.2.2 +++ cStringIO.c 16 Oct 2005 05:24:04 -0000 2.36.2.3 @@ -241,7 +241,10 @@ line = PyString_FromStringAndSize (output, n); if (!line) goto err; - PyList_Append (result, line); + if (PyList_Append (result, line) == -1) { + Py_DECREF (line); + goto err; + } Py_DECREF (line); length += n; if (hint > 0 && length >= hint) @@ -440,13 +443,18 @@ Py_DECREF(it); Py_DECREF(s); return NULL; - } - Py_DECREF(s); - } - Py_DECREF(it); - Py_RETURN_NONE; -} + } + Py_DECREF(s); + } + Py_DECREF(it); + + /* See if PyIter_Next failed */ + if (PyErr_Occurred()) + return NULL; + + Py_RETURN_NONE; +} static struct PyMethodDef O_methods[] = { /* Common methods: */ {"flush", (PyCFunction)IO_flush, METH_NOARGS, IO_flush__doc__}, Index: collectionsmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/collectionsmodule.c,v retrieving revision 1.36.4.1 retrieving revision 1.36.4.2 diff -u -d -r1.36.4.1 -r1.36.4.2 --- collectionsmodule.c 7 Jan 2005 07:02:57 -0000 1.36.4.1 +++ collectionsmodule.c 16 Oct 2005 05:24:04 -0000 1.36.4.2 @@ -95,6 +95,9 @@ dequeobject *deque; block *b; + if (!_PyArg_NoKeywords("deque()", kwds)) + return NULL; + /* create dequeobject structure */ deque = (dequeobject *)type->tp_alloc(type, 0); if (deque == NULL) @@ -368,6 +371,41 @@ return deque->len; } +static PyObject * +deque_remove(dequeobject *deque, PyObject *value) +{ + int i, n=deque->len; + + for (i=0 ; ileftblock->data[deque->leftindex]; + int cmp = PyObject_RichCompareBool(item, value, Py_EQ); + + if (deque->len != n) { + PyErr_SetString(PyExc_IndexError, + "deque mutated during remove()."); + return NULL; + } + if (cmp > 0) { + PyObject *tgt = deque_popleft(deque, NULL); + assert (tgt != NULL); + Py_DECREF(tgt); + if (_deque_rotate(deque, i) == -1) + return NULL; + Py_RETURN_NONE; + } + else if (cmp < 0) { + _deque_rotate(deque, i); + return NULL; + } + _deque_rotate(deque, -1); + } + PyErr_SetString(PyExc_ValueError, "deque.remove(x): x not in deque"); + return NULL; +} + +PyDoc_STRVAR(remove_doc, +"D.remove(value) -- remove first occurrence of value."); + static int deque_clear(dequeobject *deque) { @@ -764,7 +802,7 @@ METH_NOARGS, copy_doc}, {"extend", (PyCFunction)deque_extend, METH_O, extend_doc}, - {"extendleft", (PyCFunction)deque_extendleft, + {"extendleft", (PyCFunction)deque_extendleft, METH_O, extendleft_doc}, {"pop", (PyCFunction)deque_pop, METH_NOARGS, pop_doc}, @@ -772,6 +810,8 @@ METH_NOARGS, popleft_doc}, {"__reduce__", (PyCFunction)deque_reduce, METH_NOARGS, reduce_doc}, + {"remove", (PyCFunction)deque_remove, + METH_O, remove_doc}, {"__reversed__", (PyCFunction)deque_reviter, METH_NOARGS, reversed_doc}, {"rotate", (PyCFunction)deque_rotate, @@ -895,15 +935,17 @@ return item; } -static int +static PyObject * dequeiter_len(dequeiterobject *it) { - return it->counter; + return PyInt_FromLong(it->counter); } -static PySequenceMethods dequeiter_as_sequence = { - (inquiry)dequeiter_len, /* sq_length */ - 0, /* sq_concat */ +PyDoc_STRVAR(length_cue_doc, "Private method returning an estimate of len(list(it))."); + +static PyMethodDef dequeiter_methods[] = { + {"_length_cue", (PyCFunction)dequeiter_len, METH_NOARGS, length_cue_doc}, + {NULL, NULL} /* sentinel */ }; PyTypeObject dequeiter_type = { @@ -920,7 +962,7 @@ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ - &dequeiter_as_sequence, /* tp_as_sequence */ + 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ @@ -936,6 +978,7 @@ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc)dequeiter_next, /* tp_iternext */ + dequeiter_methods, /* tp_methods */ 0, }; @@ -1002,7 +1045,7 @@ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ - &dequeiter_as_sequence, /* tp_as_sequence */ + 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ @@ -1018,6 +1061,7 @@ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc)dequereviter_next, /* tp_iternext */ + dequeiter_methods, /* tp_methods */ 0, }; Index: datetimemodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/datetimemodule.c,v retrieving revision 1.60.4.2 retrieving revision 1.60.4.3 diff -u -d -r1.60.4.2 -r1.60.4.3 --- datetimemodule.c 7 Jan 2005 07:02:57 -0000 1.60.4.2 +++ datetimemodule.c 16 Oct 2005 05:24:04 -0000 1.60.4.3 @@ -3798,6 +3798,46 @@ return result; } +/* Return new datetime from time.strptime(). */ +static PyObject * +datetime_strptime(PyObject *cls, PyObject *args) +{ + PyObject *result = NULL, *obj, *module; + const char *string, *format; + + if (!PyArg_ParseTuple(args, "ss:strptime", &string, &format)) + return NULL; + + if ((module = PyImport_ImportModule("time")) == NULL) + return NULL; + obj = PyObject_CallMethod(module, "strptime", "ss", string, format); + Py_DECREF(module); + + if (obj != NULL) { + int i, good_timetuple = 1; + long int ia[6]; + if (PySequence_Check(obj) && PySequence_Size(obj) >= 6) + for (i=0; i < 6; i++) { + PyObject *p = PySequence_GetItem(obj, i); + if (PyInt_Check(p)) + ia[i] = PyInt_AsLong(p); + else + good_timetuple = 0; + Py_DECREF(p); + } + else + good_timetuple = 0; + if (good_timetuple) + result = PyObject_CallFunction(cls, "iiiiii", + ia[0], ia[1], ia[2], ia[3], ia[4], ia[5]); + else + PyErr_SetString(PyExc_ValueError, + "unexpected value from time.strptime"); + Py_DECREF(obj); + } + return result; +} + /* Return new datetime from date/datetime and time arguments. */ static PyObject * datetime_combine(PyObject *cls, PyObject *args, PyObject *kw) @@ -4419,6 +4459,11 @@ PyDoc_STR("timestamp -> UTC datetime from a POSIX timestamp " "(like time.time()).")}, + {"strptime", (PyCFunction)datetime_strptime, + METH_VARARGS | METH_CLASS, + PyDoc_STR("string, format -> new datetime parsed from a string " + "(like time.strptime()).")}, + {"combine", (PyCFunction)datetime_combine, METH_VARARGS | METH_KEYWORDS | METH_CLASS, PyDoc_STR("date, time -> datetime with same date and time fields")}, Index: fcntlmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/fcntlmodule.c,v retrieving revision 2.35.2.2 retrieving revision 2.35.2.3 diff -u -d -r2.35.2.2 -r2.35.2.3 --- fcntlmodule.c 7 Jan 2005 07:02:58 -0000 2.35.2.2 +++ fcntlmodule.c 16 Oct 2005 05:24:04 -0000 2.35.2.3 @@ -102,7 +102,7 @@ int mutate_arg = 1; char buf[1024]; - if (PyArg_ParseTuple(args, "O&iw#|i:ioctl", + if (PyArg_ParseTuple(args, "O&Iw#|i:ioctl", conv_descriptor, &fd, &code, &str, &len, &mutate_arg)) { char *arg; @@ -151,7 +151,7 @@ } PyErr_Clear(); - if (PyArg_ParseTuple(args, "O&is#:ioctl", + if (PyArg_ParseTuple(args, "O&Is#:ioctl", conv_descriptor, &fd, &code, &str, &len)) { if (len > sizeof buf) { PyErr_SetString(PyExc_ValueError, @@ -172,8 +172,8 @@ PyErr_Clear(); arg = 0; if (!PyArg_ParseTuple(args, - "O&i|i;ioctl requires a file or file descriptor," - " an integer and optionally a integer or buffer argument", + "O&I|i;ioctl requires a file or file descriptor," + " an integer and optionally an integer or buffer argument", conv_descriptor, &fd, &code, &arg)) { return NULL; } Index: gcmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/gcmodule.c,v retrieving revision 2.52.2.2 retrieving revision 2.52.2.3 diff -u -d -r2.52.2.2 -r2.52.2.3 --- gcmodule.c 7 Jan 2005 07:02:59 -0000 2.52.2.2 +++ gcmodule.c 16 Oct 2005 05:24:04 -0000 2.52.2.3 @@ -413,10 +413,8 @@ assert(delstr != NULL); return _PyInstance_Lookup(op, delstr) != NULL; } - else if (PyType_HasFeature(op->ob_type, Py_TPFLAGS_HEAPTYPE)) + else return op->ob_type->tp_del != NULL; - else - return 0; } /* Move the objects in unreachable with __del__ methods into `finalizers`. @@ -1166,6 +1164,7 @@ if (garbage == NULL) return; } + Py_INCREF(garbage); if (PyModule_AddObject(m, "garbage", garbage) < 0) return; #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return Index: getpath.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/getpath.c,v retrieving revision 1.41.2.2 retrieving revision 1.41.2.3 diff -u -d -r1.41.2.2 -r1.41.2.3 --- getpath.c 7 Jan 2005 07:03:00 -0000 1.41.2.2 +++ getpath.c 16 Oct 2005 05:24:04 -0000 1.41.2.3 @@ -381,7 +381,7 @@ NSModule pythonModule; #endif #ifdef __APPLE__ - unsigned long nsexeclength = MAXPATHLEN; + uint32_t nsexeclength = MAXPATHLEN; #endif /* If there is no slash in the argv0 path, then we have to Index: grpmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/grpmodule.c,v retrieving revision 2.18.2.2 retrieving revision 2.18.2.3 diff -u -d -r2.18.2.2 -r2.18.2.3 --- grpmodule.c 7 Jan 2005 07:03:00 -0000 2.18.2.2 +++ grpmodule.c 16 Oct 2005 05:24:04 -0000 2.18.2.3 @@ -85,9 +85,9 @@ static PyObject * grp_getgrgid(PyObject *self, PyObject *args) { - int gid; + unsigned int gid; struct group *p; - if (!PyArg_ParseTuple(args, "i:getgrgid", &gid)) + if (!PyArg_ParseTuple(args, "I:getgrgid", &gid)) return NULL; if ((p = getgrgid(gid)) == NULL) { PyErr_Format(PyExc_KeyError, "getgrgid(): gid not found: %d", gid); Index: itertoolsmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/itertoolsmodule.c,v retrieving revision 1.10.4.2 retrieving revision 1.10.4.3 diff -u -d -r1.10.4.2 -r1.10.4.3 --- itertoolsmodule.c 7 Jan 2005 07:03:00 -0000 1.10.4.2 +++ itertoolsmodule.c 16 Oct 2005 05:24:04 -0000 1.10.4.3 @@ -618,6 +618,9 @@ PyObject *saved; cycleobject *lz; + if (!_PyArg_NoKeywords("cycle()", kwds)) + return NULL; + if (!PyArg_UnpackTuple(args, "cycle", 1, 1, &iterable)) return NULL; @@ -765,6 +768,9 @@ PyObject *it; dropwhileobject *lz; + if (!_PyArg_NoKeywords("dropwhile()", kwds)) + return NULL; + if (!PyArg_UnpackTuple(args, "dropwhile", 2, 2, &func, &seq)) return NULL; @@ -906,6 +912,9 @@ PyObject *it; takewhileobject *lz; + if (!_PyArg_NoKeywords("takewhile()", kwds)) + return NULL; + if (!PyArg_UnpackTuple(args, "takewhile", 2, 2, &func, &seq)) return NULL; @@ -1048,6 +1057,9 @@ int numargs; isliceobject *lz; + if (!_PyArg_NoKeywords("islice()", kwds)) + return NULL; + if (!PyArg_UnpackTuple(args, "islice", 2, 4, &seq, &a1, &a2, &a3)) return NULL; @@ -1236,6 +1248,9 @@ PyObject *it; starmapobject *lz; + if (!_PyArg_NoKeywords("starmap()", kwds)) + return NULL; + if (!PyArg_UnpackTuple(args, "starmap", 2, 2, &func, &seq)) return NULL; @@ -1365,6 +1380,9 @@ imapobject *lz; int numargs, i; + if (!_PyArg_NoKeywords("imap()", kwds)) + return NULL; + numargs = PyTuple_Size(args); if (numargs < 2) { PyErr_SetString(PyExc_TypeError, @@ -1544,6 +1562,9 @@ int i; PyObject *ittuple; + if (!_PyArg_NoKeywords("chain()", kwds)) + return NULL; + /* obtain iterators */ assert(PyTuple_Check(args)); ittuple = PyTuple_New(tuplesize); @@ -1684,6 +1705,9 @@ PyObject *it; ifilterobject *lz; + if (!_PyArg_NoKeywords("ifilter()", kwds)) + return NULL; + if (!PyArg_UnpackTuple(args, "ifilter", 2, 2, &func, &seq)) return NULL; @@ -1825,6 +1849,9 @@ PyObject *it; ifilterfalseobject *lz; + if (!_PyArg_NoKeywords("ifilterfalse()", kwds)) + return NULL; + if (!PyArg_UnpackTuple(args, "ifilterfalse", 2, 2, &func, &seq)) return NULL; @@ -1964,6 +1991,9 @@ countobject *lz; long cnt = 0; + if (!_PyArg_NoKeywords("count()", kwds)) + return NULL; + if (!PyArg_ParseTuple(args, "|l:count", &cnt)) return NULL; @@ -2060,6 +2090,9 @@ PyObject *result; int tuplesize = PySequence_Length(args); + if (!_PyArg_NoKeywords("izip()", kwds)) + return NULL; + /* args must be a tuple */ assert(PyTuple_Check(args)); @@ -2240,6 +2273,9 @@ PyObject *element; long cnt = -1; + if (!_PyArg_NoKeywords("repeat()", kwds)) + return NULL; + if (!PyArg_ParseTuple(args, "O|l:repeat", &element, &cnt)) return NULL; @@ -2300,17 +2336,21 @@ return result; } -static int +static PyObject * repeat_len(repeatobject *ro) { - if (ro->cnt == -1) + if (ro->cnt == -1) { PyErr_SetString(PyExc_TypeError, "len() of unsized object"); - return (int)(ro->cnt); + return NULL; + } + return PyInt_FromLong(ro->cnt); } -static PySequenceMethods repeat_as_sequence = { - (inquiry)repeat_len, /* sq_length */ - 0, /* sq_concat */ +PyDoc_STRVAR(length_cue_doc, "Private method returning an estimate of len(list(it))."); + +static PyMethodDef repeat_methods[] = { + {"_length_cue", (PyCFunction)repeat_len, METH_NOARGS, length_cue_doc}, + {NULL, NULL} /* sentinel */ }; PyDoc_STRVAR(repeat_doc, @@ -2332,7 +2372,7 @@ 0, /* tp_compare */ (reprfunc)repeat_repr, /* tp_repr */ 0, /* tp_as_number */ - &repeat_as_sequence, /* tp_as_sequence */ + 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ @@ -2349,7 +2389,7 @@ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc)repeat_next, /* tp_iternext */ - 0, /* tp_methods */ + repeat_methods, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ Index: ld_so_aix =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/ld_so_aix,v retrieving revision 2.5 retrieving revision 2.5.32.1 diff -u -d -r2.5 -r2.5.32.1 --- ld_so_aix 3 Sep 1997 00:45:30 -0000 2.5 +++ ld_so_aix 16 Oct 2005 05:24:04 -0000 2.5.32.1 @@ -168,6 +168,10 @@ CCOPT="-Wl,-e$entry -Wl,-bE:$expfile -Wl,-bI:$impfile -Wl,-bhalt:4" CCOPT="$CCOPT -Wl,-bM:SRE -Wl,-T512 -Wl,-H512 -lm -o $objfile" +# Note: to use dynamic libraries like libtcl8.4.so and libtk8.4.so +# you may need to replace the second CCOPT line above with the following: +# CCOPT="$CCOPT -Wl,-bM:SRE -Wl,-T512 -Wl,-H512 -brtl -bnortllib -lm -o $objfile" + CCARGS="$args" # Export list generation. Index: main.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/main.c,v retrieving revision 1.64.2.3 retrieving revision 1.64.2.4 diff -u -d -r1.64.2.3 -r1.64.2.4 --- main.c 7 Jan 2005 07:03:00 -0000 1.64.2.3 +++ main.c 16 Oct 2005 05:24:04 -0000 1.64.2.4 @@ -359,6 +359,14 @@ } } } + { + /* XXX: does this work on Win/Win64? (see posix_fstat) */ + struct stat sb; + if (fstat(fileno(fp), &sb) == 0 && + S_ISDIR(sb.st_mode)) { + fprintf(stderr, "%s: warning '%s' is a directory\n", argv[0], filename); + } + } } } Index: makexp_aix =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/makexp_aix,v retrieving revision 2.2 retrieving revision 2.2.32.1 diff -u -d -r2.2 -r2.2.32.1 --- makexp_aix 9 Apr 1998 21:46:02 -0000 2.2 +++ makexp_aix 16 Oct 2005 05:24:04 -0000 2.2.32.1 @@ -70,6 +70,12 @@ # left with just the symbol name. # 7. Eliminate all entries containing two colons, like Class::method # -/usr/ccs/bin/nm -Bex $inputFiles \ + +# Use -X32_64 if it appears to be implemented in this version of 'nm'. +NM=/usr/ccs/bin/nm +xopt=-X32_64 +$NM -e $xopt $1 >/dev/null 2>&1 || xopt="" + +$NM -Bex $xopt $inputFiles \ | sed -e '/ [^BDT] /d' -e '/\./d' -e 's/.* [BDT] //' -e '/::/d' \ | sort | uniq >> $expFileName Index: mathmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/mathmodule.c,v retrieving revision 2.68.2.2 retrieving revision 2.68.2.3 diff -u -d -r2.68.2.2 -r2.68.2.3 --- mathmodule.c 7 Jan 2005 07:03:00 -0000 2.68.2.2 +++ mathmodule.c 16 Oct 2005 05:24:04 -0000 2.68.2.3 @@ -1,7 +1,7 @@ /* Math module -- standard C math library functions, pi and e */ #include "Python.h" -#include "longintrepr.h" +#include "longintrepr.h" /* just for SHIFT */ #ifndef _MSC_VER #ifndef __STDC__ Index: md5module.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/md5module.c,v retrieving revision 2.30.2.2 retrieving revision 2.30.2.3 diff -u -d -r2.30.2.2 -r2.30.2.3 --- md5module.c 7 Jan 2005 07:03:00 -0000 2.30.2.2 +++ md5module.c 16 Oct 2005 05:24:04 -0000 2.30.2.3 @@ -10,6 +10,7 @@ /* MD5 objects */ #include "Python.h" +#include "structmember.h" #include "md5.h" typedef struct { @@ -150,15 +151,46 @@ }; static PyObject * -md5_getattr(md5object *self, char *name) +md5_get_block_size(PyObject *self, void *closure) { - if (strcmp(name, "digest_size") == 0) { - return PyInt_FromLong(16); - } + return PyInt_FromLong(64); +} - return Py_FindMethod(md5_methods, (PyObject *)self, name); +static PyObject * +md5_get_digest_size(PyObject *self, void *closure) +{ + return PyInt_FromLong(16); +} + +static PyObject * +md5_get_name(PyObject *self, void *closure) +{ + return PyString_FromStringAndSize("MD5", 3); } +static PyGetSetDef md5_getseters[] = { + {"digest_size", + (getter)md5_get_digest_size, NULL, + NULL, + NULL}, + {"block_size", + (getter)md5_get_block_size, NULL, + NULL, + NULL}, + {"name", + (getter)md5_get_name, NULL, + NULL, + NULL}, + /* the old md5 and sha modules support 'digest_size' as in PEP 247. + * the old sha module also supported 'digestsize'. ugh. */ + {"digestsize", + (getter)md5_get_digest_size, NULL, + NULL, + NULL}, + {NULL} /* Sentinel */ +}; + + PyDoc_STRVAR(module_doc, "This module implements the interface to RSA's MD5 message digest\n\ algorithm (see also Internet RFC 1321). Its use is quite\n\ @@ -191,13 +223,13 @@ static PyTypeObject MD5type = { PyObject_HEAD_INIT(NULL) 0, /*ob_size*/ - "md5.md5", /*tp_name*/ + "_md5.md5", /*tp_name*/ sizeof(md5object), /*tp_size*/ 0, /*tp_itemsize*/ /* methods */ (destructor)md5_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ - (getattrfunc)md5_getattr, /*tp_getattr*/ + 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ @@ -210,8 +242,17 @@ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ - 0, /*tp_xxx4*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ md5type_doc, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + md5_methods, /*tp_methods*/ + 0, /*tp_members*/ + md5_getseters, /*tp_getset*/ }; @@ -247,7 +288,6 @@ static PyMethodDef md5_functions[] = { {"new", (PyCFunction)MD5_new, METH_VARARGS, new_doc}, - {"md5", (PyCFunction)MD5_new, METH_VARARGS, new_doc}, /* Backward compatibility */ {NULL, NULL} /* Sentinel */ }; @@ -255,12 +295,14 @@ /* Initialize this module. */ PyMODINIT_FUNC -initmd5(void) +init_md5(void) { PyObject *m, *d; MD5type.ob_type = &PyType_Type; - m = Py_InitModule3("md5", md5_functions, module_doc); + if (PyType_Ready(&MD5type) < 0) + return; + m = Py_InitModule3("_md5", md5_functions, module_doc); d = PyModule_GetDict(m); PyDict_SetItemString(d, "MD5Type", (PyObject *)&MD5type); PyModule_AddIntConstant(m, "digest_size", 16); Index: mmapmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/mmapmodule.c,v retrieving revision 2.39.2.2 retrieving revision 2.39.2.3 diff -u -d -r2.39.2.2 -r2.39.2.3 --- mmapmodule.c 7 Jan 2005 07:03:00 -0000 2.39.2.2 +++ mmapmodule.c 16 Oct 2005 05:24:04 -0000 2.39.2.3 @@ -423,6 +423,11 @@ } else { void *newmap; + if (ftruncate(self->fd, new_size) == -1) { + PyErr_SetFromErrno(mmap_module_error); + return NULL; + } + #ifdef MREMAP_MAYMOVE newmap = mremap(self->data, self->size, new_size, MREMAP_MAYMOVE); #else @@ -896,18 +901,26 @@ /* on OpenVMS we must ensure that all bytes are written to the file */ fsync(fd); # endif - if (fstat(fd, &st) == 0 && S_ISREG(st.st_mode) && - (size_t)map_size > st.st_size) { - PyErr_SetString(PyExc_ValueError, - "mmap length is greater than file size"); - return NULL; + if (fstat(fd, &st) == 0 && S_ISREG(st.st_mode)) { + if (map_size == 0) { + map_size = (int)st.st_size; + } else if ((size_t)map_size > st.st_size) { + PyErr_SetString(PyExc_ValueError, + "mmap length is greater than file size"); + return NULL; + } } #endif m_obj = PyObject_New (mmap_object, &mmap_object_type); if (m_obj == NULL) {return NULL;} m_obj->size = (size_t) map_size; m_obj->pos = (size_t) 0; - m_obj->fd = fd; + m_obj->fd = dup(fd); + if (m_obj->fd == -1) { + Py_DECREF(m_obj); + PyErr_SetFromErrno(mmap_module_error); + return NULL; + } m_obj->data = mmap(NULL, map_size, prot, flags, fd, 0); Index: operator.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/operator.c,v retrieving revision 2.21.2.2 retrieving revision 2.21.2.3 diff -u -d -r2.21.2.2 -r2.21.2.3 --- operator.c 7 Jan 2005 07:03:00 -0000 2.21.2.2 +++ operator.c 16 Oct 2005 05:24:04 -0000 2.21.2.3 @@ -256,6 +256,7 @@ typedef struct { PyObject_HEAD + int nitems; PyObject *item; } itemgetterobject; @@ -266,10 +267,18 @@ { itemgetterobject *ig; PyObject *item; + int nitems; - if (!PyArg_UnpackTuple(args, "itemgetter", 1, 1, &item)) + if (!_PyArg_NoKeywords("itemgetter()", kwds)) return NULL; + nitems = PyTuple_GET_SIZE(args); + if (nitems <= 1) { + if (!PyArg_UnpackTuple(args, "itemgetter", 1, 1, &item)) + return NULL; + } else + item = args; + /* create itemgetterobject structure */ ig = PyObject_GC_New(itemgetterobject, &itemgetter_type); if (ig == NULL) @@ -277,6 +286,7 @@ Py_INCREF(item); ig->item = item; + ig->nitems = nitems; PyObject_GC_Track(ig); return (PyObject *)ig; @@ -301,18 +311,40 @@ static PyObject * itemgetter_call(itemgetterobject *ig, PyObject *args, PyObject *kw) { - PyObject * obj; + PyObject *obj, *result; + int i, nitems=ig->nitems; if (!PyArg_UnpackTuple(args, "itemgetter", 1, 1, &obj)) return NULL; - return PyObject_GetItem(obj, ig->item); + if (nitems == 1) + return PyObject_GetItem(obj, ig->item); + + assert(PyTuple_Check(ig->item)); + assert(PyTuple_GET_SIZE(ig->item) == nitems); + + result = PyTuple_New(nitems); + if (result == NULL) + return NULL; + + for (i=0 ; i < nitems ; i++) { + PyObject *item, *val; + item = PyTuple_GET_ITEM(ig->item, i); + val = PyObject_GetItem(obj, item); + if (val == NULL) { + Py_DECREF(result); + return NULL; + } + PyTuple_SET_ITEM(result, i, val); + } + return result; } PyDoc_STRVAR(itemgetter_doc, -"itemgetter(item) --> itemgetter object\n\ +"itemgetter(item, ...) --> itemgetter object\n\ \n\ -Return a callable object that fetches the given item from its operand.\n\ -After, f=itemgetter(2), the call f(b) returns b[2]."); +Return a callable object that fetches the given item(s) from its operand.\n\ +After, f=itemgetter(2), the call f(r) returns r[2].\n\ +After, g=itemgetter(2,5,3), the call g(r) returns (r[2], r[5], r[3])"); static PyTypeObject itemgetter_type = { PyObject_HEAD_INIT(NULL) @@ -363,6 +395,7 @@ typedef struct { PyObject_HEAD + int nattrs; PyObject *attr; } attrgetterobject; @@ -373,10 +406,18 @@ { attrgetterobject *ag; PyObject *attr; + int nattrs; - if (!PyArg_UnpackTuple(args, "attrgetter", 1, 1, &attr)) + if (!_PyArg_NoKeywords("attrgetter()", kwds)) return NULL; + nattrs = PyTuple_GET_SIZE(args); + if (nattrs <= 1) { + if (!PyArg_UnpackTuple(args, "attrgetter", 1, 1, &attr)) + return NULL; + } else + attr = args; + /* create attrgetterobject structure */ ag = PyObject_GC_New(attrgetterobject, &attrgetter_type); if (ag == NULL) @@ -384,6 +425,7 @@ Py_INCREF(attr); ag->attr = attr; + ag->nattrs = nattrs; PyObject_GC_Track(ag); return (PyObject *)ag; @@ -408,18 +450,40 @@ static PyObject * attrgetter_call(attrgetterobject *ag, PyObject *args, PyObject *kw) { - PyObject * obj; + PyObject *obj, *result; + int i, nattrs=ag->nattrs; if (!PyArg_UnpackTuple(args, "attrgetter", 1, 1, &obj)) return NULL; - return PyObject_GetAttr(obj, ag->attr); + if (ag->nattrs == 1) + return PyObject_GetAttr(obj, ag->attr); + + assert(PyTuple_Check(ag->attr)); + assert(PyTuple_GET_SIZE(ag->attr) == nattrs); + + result = PyTuple_New(nattrs); + if (result == NULL) + return NULL; + + for (i=0 ; i < nattrs ; i++) { + PyObject *attr, *val; + attr = PyTuple_GET_ITEM(ag->attr, i); + val = PyObject_GetAttr(obj, attr); + if (val == NULL) { + Py_DECREF(result); + return NULL; + } + PyTuple_SET_ITEM(result, i, val); + } + return result; } PyDoc_STRVAR(attrgetter_doc, -"attrgetter(attr) --> attrgetter object\n\ +"attrgetter(attr, ...) --> attrgetter object\n\ \n\ -Return a callable object that fetches the given attribute from its operand.\n\ -After, f=attrgetter('name'), the call f(b) returns b.name."); +Return a callable object that fetches the given attribute(s) from its operand.\n\ +After, f=attrgetter('name'), the call f(r) returns r.name.\n\ +After, g=attrgetter('name', 'date'), the call g(r) returns (r.name, r.date)."); static PyTypeObject attrgetter_type = { PyObject_HEAD_INIT(NULL) Index: ossaudiodev.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/ossaudiodev.c,v retrieving revision 1.26.4.2 retrieving revision 1.26.4.3 diff -u -d -r1.26.4.2 -r1.26.4.3 --- ossaudiodev.c 7 Jan 2005 07:03:00 -0000 1.26.4.2 +++ ossaudiodev.c 16 Oct 2005 05:24:04 -0000 1.26.4.3 @@ -46,11 +46,12 @@ typedef struct { PyObject_HEAD; - int fd; /* The open file */ - int mode; /* file mode */ - int icount; /* Input count */ - int ocount; /* Output count */ - uint32_t afmts; /* Audio formats supported by hardware */ + char *devicename; /* name of the device file */ + int fd; /* file descriptor */ + int mode; /* file mode (O_RDONLY, etc.) */ + int icount; /* input count */ + int ocount; /* output count */ + uint32_t afmts; /* audio formats supported by hardware */ } oss_audio_t; typedef struct { @@ -74,7 +75,7 @@ { oss_audio_t *self; int fd, afmts, imode; - char *basedev = NULL; + char *devicename = NULL; char *mode = NULL; /* Two ways to call open(): @@ -82,11 +83,11 @@ open(mode) (for backwards compatibility) because the *first* argument is optional, parsing args is a wee bit tricky. */ - if (!PyArg_ParseTuple(arg, "s|s:open", &basedev, &mode)) + if (!PyArg_ParseTuple(arg, "s|s:open", &devicename, &mode)) return NULL; if (mode == NULL) { /* only one arg supplied */ - mode = basedev; - basedev = NULL; + mode = devicename; + devicename = NULL; } if (strcmp(mode, "r") == 0) @@ -102,18 +103,18 @@ /* Open the correct device: either the 'device' argument, or the AUDIODEV environment variable, or "/dev/dsp". */ - if (basedev == NULL) { /* called with one arg */ - basedev = getenv("AUDIODEV"); - if (basedev == NULL) /* $AUDIODEV not set */ - basedev = "/dev/dsp"; + if (devicename == NULL) { /* called with one arg */ + devicename = getenv("AUDIODEV"); + if (devicename == NULL) /* $AUDIODEV not set */ + devicename = "/dev/dsp"; } /* Open with O_NONBLOCK to avoid hanging on devices that only allow one open at a time. This does *not* affect later I/O; OSS provides a special ioctl() for non-blocking read/write, which is exposed via oss_nonblock() below. */ - if ((fd = open(basedev, imode|O_NONBLOCK)) == -1) { - PyErr_SetFromErrnoWithFilename(PyExc_IOError, basedev); + if ((fd = open(devicename, imode|O_NONBLOCK)) == -1) { + PyErr_SetFromErrnoWithFilename(PyExc_IOError, devicename); return NULL; } @@ -121,12 +122,12 @@ expected write() semantics. */ if (fcntl(fd, F_SETFL, 0) == -1) { close(fd); - PyErr_SetFromErrnoWithFilename(PyExc_IOError, basedev); + PyErr_SetFromErrnoWithFilename(PyExc_IOError, devicename); return NULL; } if (ioctl(fd, SNDCTL_DSP_GETFMTS, &afmts) == -1) { - PyErr_SetFromErrnoWithFilename(PyExc_IOError, basedev); + PyErr_SetFromErrnoWithFilename(PyExc_IOError, devicename); return NULL; } /* Create and initialize the object */ @@ -134,6 +135,7 @@ close(fd); return NULL; } + self->devicename = devicename; self->fd = fd; self->mode = imode; self->icount = self->ocount = 0; @@ -158,22 +160,22 @@ static oss_mixer_t * newossmixerobject(PyObject *arg) { - char *basedev = NULL; + char *devicename = NULL; int fd; oss_mixer_t *self; - if (!PyArg_ParseTuple(arg, "|s", &basedev)) { + if (!PyArg_ParseTuple(arg, "|s", &devicename)) { return NULL; } - if (basedev == NULL) { - basedev = getenv("MIXERDEV"); - if (basedev == NULL) /* MIXERDEV not set */ - basedev = "/dev/mixer"; + if (devicename == NULL) { + devicename = getenv("MIXERDEV"); + if (devicename == NULL) /* MIXERDEV not set */ + devicename = "/dev/mixer"; } - if ((fd = open(basedev, O_RDWR)) == -1) { - PyErr_SetFromErrnoWithFilename(PyExc_IOError, basedev); + if ((fd = open(devicename, O_RDWR)) == -1) { + PyErr_SetFromErrnoWithFilename(PyExc_IOError, devicename); return NULL; } @@ -827,7 +829,33 @@ static PyObject * oss_getattr(oss_audio_t *self, char *name) { - return Py_FindMethod(oss_methods, (PyObject *)self, name); + PyObject * rval = NULL; + if (strcmp(name, "closed") == 0) { + rval = (self->fd == -1) ? Py_True : Py_False; + Py_INCREF(rval); + } + else if (strcmp(name, "name") == 0) { + rval = PyString_FromString(self->devicename); + } + else if (strcmp(name, "mode") == 0) { + /* No need for a "default" in this switch: from newossobject(), + self->mode can only be one of these three values. */ + switch(self->mode) { + case O_RDONLY: + rval = PyString_FromString("r"); + break; + case O_RDWR: + rval = PyString_FromString("rw"); + break; + case O_WRONLY: + rval = PyString_FromString("w"); + break; + } + } + else { + rval = Py_FindMethod(oss_methods, (PyObject *)self, name); + } + return rval; } static PyObject * @@ -969,6 +997,18 @@ #ifdef AFMT_S16_NE _EXPORT_INT(m, AFMT_S16_NE); #endif +#ifdef AFMT_U16_NE + _EXPORT_INT(m, AFMT_U16_NE); +#endif +#ifdef AFMT_S32_LE + _EXPORT_INT(m, AFMT_S32_LE); +#endif +#ifdef AFMT_S32_BE + _EXPORT_INT(m, AFMT_S32_BE); +#endif +#ifdef AFMT_MPEG + _EXPORT_INT(m, AFMT_MPEG); +#endif /* Expose the sound mixer device numbers. */ _EXPORT_INT(m, SOUND_MIXER_NRDEVICES); Index: parsermodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/parsermodule.c,v retrieving revision 2.70.2.2 retrieving revision 2.70.2.3 diff -u -d -r2.70.2.2 -r2.70.2.3 --- parsermodule.c 7 Jan 2005 07:03:01 -0000 2.70.2.2 +++ parsermodule.c 16 Oct 2005 05:24:04 -0000 2.70.2.3 @@ -859,7 +859,8 @@ VALIDATER(listmaker); VALIDATER(yield_stmt); VALIDATER(testlist1); VALIDATER(gen_for); VALIDATER(gen_iter); VALIDATER(gen_if); -VALIDATER(testlist_gexp); +VALIDATER(testlist_gexp); VALIDATER(yield_expr); +VALIDATER(yield_or_testlist); #undef VALIDATER @@ -947,7 +948,8 @@ validate_class(node *tree) { int nch = NCH(tree); - int res = validate_ntype(tree, classdef) && ((nch == 4) || (nch == 7)); + int res = (validate_ntype(tree, classdef) && + ((nch == 4) || (nch == 6) || (nch == 7))); if (res) { res = (validate_name(CHILD(tree, 0), "class") @@ -955,12 +957,20 @@ && validate_colon(CHILD(tree, nch - 2)) && validate_suite(CHILD(tree, nch - 1))); } - else + else { (void) validate_numnodes(tree, 4, "class"); - if (res && (nch == 7)) { - res = (validate_lparen(CHILD(tree, 2)) - && validate_testlist(CHILD(tree, 3)) - && validate_rparen(CHILD(tree, 4))); + } + + if (res) { + if (nch == 7) { + res = ((validate_lparen(CHILD(tree, 2)) && + validate_testlist(CHILD(tree, 3)) && + validate_rparen(CHILD(tree, 4)))); + } + else if (nch == 6) { + res = (validate_lparen(CHILD(tree,2)) && + validate_rparen(CHILD(tree,3))); + } } return (res); } @@ -1498,6 +1508,15 @@ static int +validate_yield_or_testlist(node *tree) +{ + if (TYPE(tree) == yield_expr) + return validate_yield_expr(tree); + else + return validate_testlist(tree); +} + +static int validate_expr_stmt(node *tree) { int j; @@ -1508,8 +1527,8 @@ if (res && nch == 3 && TYPE(CHILD(tree, 1)) == augassign) { - res = (validate_numnodes(CHILD(tree, 1), 1, "augassign") - && validate_testlist(CHILD(tree, 2))); + res = validate_numnodes(CHILD(tree, 1), 1, "augassign") + && validate_yield_or_testlist(CHILD(tree, 2)); if (res) { char *s = STR(CHILD(CHILD(tree, 1), 0)); @@ -1532,8 +1551,8 @@ } else { for (j = 1; res && (j < nch); j += 2) - res = (validate_equal(CHILD(tree, j)) - && validate_testlist(CHILD(tree, j + 1))); + res = validate_equal(CHILD(tree, j)) + && validate_yield_or_testlist(CHILD(tree, j + 1)); } return (res); } @@ -1640,15 +1659,31 @@ } -/* yield_stmt: 'yield' testlist +/* yield_expr: 'yield' [testlist] + */ +static int +validate_yield_expr(node *tree) +{ + int nch = NCH(tree); + int res = (validate_ntype(tree, yield_expr) + && ((nch == 1) || (nch == 2)) + && validate_name(CHILD(tree, 0), "yield")); + + if (res && (nch == 2)) + res = validate_testlist(CHILD(tree, 1)); + + return (res); +} + + +/* yield_stmt: yield_expr */ static int validate_yield_stmt(node *tree) { return (validate_ntype(tree, yield_stmt) - && validate_numnodes(tree, 2, "yield_stmt") - && validate_name(CHILD(tree, 0), "yield") - && validate_testlist(CHILD(tree, 1))); + && validate_numnodes(tree, 1, "yield_stmt") + && validate_yield_expr(CHILD(tree, 0))); } @@ -2291,8 +2326,12 @@ res = ((nch <= 3) && (validate_rparen(CHILD(tree, nch - 1)))); - if (res && (nch == 3)) - res = validate_testlist_gexp(CHILD(tree, 1)); + if (res && (nch == 3)) { + if (TYPE(CHILD(tree, 1))==yield_expr) + res = validate_yield_expr(CHILD(tree, 1)); + else + res = validate_testlist_gexp(CHILD(tree, 1)); + } break; case LSQB: if (nch == 2) @@ -2905,6 +2944,9 @@ case testlist: res = validate_testlist(tree); break; + case yield_expr: + res = validate_yield_expr(tree); + break; case testlist1: res = validate_testlist1(tree); break; Index: posixmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/posixmodule.c,v retrieving revision 2.241.2.2 retrieving revision 2.241.2.3 diff -u -d -r2.241.2.2 -r2.241.2.3 --- posixmodule.c 7 Jan 2005 07:03:02 -0000 2.241.2.2 +++ posixmodule.c 16 Oct 2005 05:24:04 -0000 2.241.2.3 @@ -674,8 +674,8 @@ (mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime)\n\ or via the attributes st_mode, st_ino, st_dev, st_nlink, st_uid, and so on.\n\ \n\ -Posix/windows: If your platform supports st_blksize, st_blocks, or st_rdev,\n\ -they are available as attributes only.\n\ +Posix/windows: If your platform supports st_blksize, st_blocks, st_rdev,\n\ +or st_flags, they are available as attributes only.\n\ \n\ See os.stat for more information."); @@ -703,6 +703,15 @@ #ifdef HAVE_STRUCT_STAT_ST_RDEV {"st_rdev", "device type (if inode device)"}, #endif +#ifdef HAVE_STRUCT_STAT_ST_FLAGS + {"st_flags", "user defined flags for file"}, +#endif +#ifdef HAVE_STRUCT_STAT_ST_GEN + {"st_gen", "generation number"}, +#endif +#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME + {"st_birthtime", "time of creation"}, +#endif {0} }; @@ -724,6 +733,24 @@ #define ST_RDEV_IDX ST_BLOCKS_IDX #endif +#ifdef HAVE_STRUCT_STAT_ST_FLAGS +#define ST_FLAGS_IDX (ST_RDEV_IDX+1) +#else +#define ST_FLAGS_IDX ST_RDEV_IDX +#endif + +#ifdef HAVE_STRUCT_STAT_ST_GEN +#define ST_GEN_IDX (ST_FLAGS_IDX+1) +#else +#define ST_GEN_IDX ST_FLAGS_IDX +#endif + +#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME +#define ST_BIRTHTIME_IDX (ST_GEN_IDX+1) +#else +#define ST_BIRTHTIME_IDX ST_GEN_IDX +#endif + static PyStructSequence_Desc stat_result_desc = { "stat_result", /* name */ stat_result__doc__, /* doc */ @@ -789,7 +816,7 @@ /* If true, st_?time is float. */ -static int _stat_float_times = 0; +static int _stat_float_times = 1; PyDoc_STRVAR(stat_float_times__doc__, "stat_float_times([newval]) -> oldval\n\n\ @@ -869,8 +896,14 @@ mnsec = st.st_mtim.tv_nsec; cnsec = st.st_ctim.tv_nsec; #else +#ifdef HAVE_STAT_TV_NSEC2 + ansec = st.st_atimespec.tv_nsec; + mnsec = st.st_mtimespec.tv_nsec; + cnsec = st.st_ctimespec.tv_nsec; +#else ansec = mnsec = cnsec = 0; #endif +#endif fill_time(v, 7, st.st_atime, ansec); fill_time(v, 8, st.st_mtime, mnsec); fill_time(v, 9, st.st_ctime, cnsec); @@ -887,6 +920,33 @@ PyStructSequence_SET_ITEM(v, ST_RDEV_IDX, PyInt_FromLong((long)st.st_rdev)); #endif +#ifdef HAVE_STRUCT_STAT_ST_GEN + PyStructSequence_SET_ITEM(v, ST_GEN_IDX, + PyInt_FromLong((long)st.st_gen)); +#endif +#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME + { + PyObject *val; + unsigned long bsec,bnsec; + bsec = (long)st.st_birthtime; +#ifdef HAVE_STAT_TV_NSEC2 + bnsec = st.st_birthtimespec.tv_nsec; +#else + bnsec = 0; +#endif + if (_stat_float_times) { + val = PyFloat_FromDouble(bsec + 1e-9*bnsec); + } else { + val = PyInt_FromLong((long)bsec); + } + PyStructSequence_SET_ITEM(v, ST_BIRTHTIME_IDX, + val); + } +#endif +#ifdef HAVE_STRUCT_STAT_ST_FLAGS + PyStructSequence_SET_ITEM(v, ST_FLAGS_IDX, + PyInt_FromLong((long)st.st_flags)); +#endif if (PyErr_Occurred()) { Py_DECREF(v); @@ -1106,19 +1166,21 @@ it is a simple dereference. */ res = _waccess(PyUnicode_AS_UNICODE(po), mode); Py_END_ALLOW_THREADS - return(PyBool_FromLong(res == 0)); + return PyBool_FromLong(res == 0); } /* Drop the argument parsing error as narrow strings are also valid. */ PyErr_Clear(); } #endif - if (!PyArg_ParseTuple(args, "si:access", &path, &mode)) + if (!PyArg_ParseTuple(args, "eti:access", + Py_FileSystemDefaultEncoding, &path, &mode)) return NULL; Py_BEGIN_ALLOW_THREADS res = access(path, mode); Py_END_ALLOW_THREADS - return(PyBool_FromLong(res == 0)); + PyMem_Free(path); + return PyBool_FromLong(res == 0); } #ifndef F_OK @@ -1160,8 +1222,8 @@ ret = ttyname(id); #endif if (ret == NULL) - return(posix_error()); - return(PyString_FromString(ret)); + return posix_error(); + return PyString_FromString(ret); } #endif @@ -1182,8 +1244,8 @@ ret = ctermid(buffer); #endif if (ret == NULL) - return(posix_error()); - return(PyString_FromString(buffer)); + return posix_error(); + return PyString_FromString(buffer); } #endif @@ -1977,6 +2039,8 @@ return -1; intval = PyInt_AsLong(intobj); Py_DECREF(intobj); + if (intval == -1 && PyErr_Occurred()) + return -1; *sec = intval; *usec = (long)((tval - intval) * 1e6); /* can't exceed 1000000 */ if (*usec < 0) @@ -2949,7 +3013,7 @@ static PyObject * posix_forkpty(PyObject *self, PyObject *noargs) { - int master_fd, pid; + int master_fd = -1, pid; pid = forkpty(&master_fd, NULL, NULL, NULL); if (pid == -1) @@ -5349,6 +5413,10 @@ PyObject *buffer; if (!PyArg_ParseTuple(args, "ii:read", &fd, &size)) return NULL; + if (size < 0) { + errno = EINVAL; + return posix_error(); + } buffer = PyString_FromStringAndSize((char *)NULL, size); if (buffer == NULL) return NULL; @@ -7181,13 +7249,18 @@ { char *filepath; HINSTANCE rc; - if (!PyArg_ParseTuple(args, "s:startfile", &filepath)) + if (!PyArg_ParseTuple(args, "et:startfile", + Py_FileSystemDefaultEncoding, &filepath)) return NULL; Py_BEGIN_ALLOW_THREADS rc = ShellExecute((HWND)0, NULL, filepath, NULL, NULL, SW_SHOWNORMAL); Py_END_ALLOW_THREADS - if (rc <= (HINSTANCE)32) - return win32_error("startfile", filepath); + if (rc <= (HINSTANCE)32) { + PyObject *errval = win32_error("startfile", filepath); + PyMem_Free(filepath); + return errval; + } + PyMem_Free(filepath); Py_INCREF(Py_None); return Py_None; } @@ -7713,6 +7786,12 @@ #ifdef O_LARGEFILE if (ins(d, "O_LARGEFILE", (long)O_LARGEFILE)) return -1; #endif +#ifdef O_SHLOCK + if (ins(d, "O_SHLOCK", (long)O_SHLOCK)) return -1; +#endif +#ifdef O_EXLOCK + if (ins(d, "O_EXLOCK", (long)O_EXLOCK)) return -1; +#endif /* MS Windows */ #ifdef O_NOINHERIT Index: pwdmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/pwdmodule.c,v retrieving revision 1.34.2.2 retrieving revision 1.34.2.3 diff -u -d -r1.34.2.2 -r1.34.2.3 --- pwdmodule.c 7 Jan 2005 07:03:18 -0000 1.34.2.2 +++ pwdmodule.c 16 Oct 2005 05:24:04 -0000 1.34.2.3 @@ -102,9 +102,9 @@ static PyObject * pwd_getpwuid(PyObject *self, PyObject *args) { - int uid; + unsigned int uid; struct passwd *p; - if (!PyArg_ParseTuple(args, "i:getpwuid", &uid)) + if (!PyArg_ParseTuple(args, "I:getpwuid", &uid)) return NULL; if ((p = getpwuid(uid)) == NULL) { PyErr_Format(PyExc_KeyError, Index: pyexpat.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/pyexpat.c,v retrieving revision 2.67.2.2 retrieving revision 2.67.2.3 diff -u -d -r2.67.2.2 -r2.67.2.3 --- pyexpat.c 7 Jan 2005 07:03:18 -0000 2.67.2.2 +++ pyexpat.c 16 Oct 2005 05:24:04 -0000 2.67.2.3 @@ -417,6 +417,9 @@ { PyObject *result = STRING_CONV_FUNC(str); PyObject *value; + /* result can be NULL if the unicode conversion failed. */ + if (!result) + return result; if (!self->intern) return result; value = PyDict_GetItem(self->intern, result); @@ -572,7 +575,9 @@ Py_DECREF(v); } } - args = Py_BuildValue("(NN)", string_intern(self, name), container); + args = string_intern(self, name); + if (args != NULL) + args = Py_BuildValue("(NN)", args, container); if (args == NULL) { Py_DECREF(container); return; @@ -1082,7 +1087,7 @@ = XML_GetInputContext(self->itself, &offset, &size); if (buffer != NULL) - result = PyString_FromStringAndSize(buffer + offset, size); + result = PyString_FromStringAndSize(buffer + offset, size - offset); else { result = Py_None; Py_INCREF(result); Index: readline.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/readline.c,v retrieving revision 2.51.2.2 retrieving revision 2.51.2.3 diff -u -d -r2.51.2.2 -r2.51.2.3 --- readline.c 7 Jan 2005 07:03:19 -0000 2.51.2.2 +++ readline.c 16 Oct 2005 05:24:04 -0000 2.51.2.3 @@ -1,9 +1,7 @@ /* This module makes GNU readline available to Python. It has ideas * contributed by Lee Busby, LLNL, and William Magro, Cornell Theory - * Center. The completer interface was inspired by Lele Gaifax. - * - * More recently, it was largely rewritten by Guido van Rossum who is - * now maintaining it. + * Center. The completer interface was inspired by Lele Gaifax. More + * recently, it was largely rewritten by Guido van Rossum. */ /* Standard definitions */ @@ -161,8 +159,7 @@ /* Generic hook function setter */ static PyObject * -set_hook(const char *funcname, PyObject **hook_var, - PyThreadState **tstate, PyObject *args) +set_hook(const char *funcname, PyObject **hook_var, PyObject *args) { PyObject *function = Py_None; char buf[80]; @@ -172,14 +169,12 @@ if (function == Py_None) { Py_XDECREF(*hook_var); *hook_var = NULL; - *tstate = NULL; } else if (PyCallable_Check(function)) { PyObject *tmp = *hook_var; Py_INCREF(function); *hook_var = function; Py_XDECREF(tmp); - *tstate = PyThreadState_GET(); } else { PyOS_snprintf(buf, sizeof(buf), @@ -196,18 +191,15 @@ /* Exported functions to specify hook functions in Python */ static PyObject *startup_hook = NULL; -static PyThreadState *startup_hook_tstate = NULL; #ifdef HAVE_RL_PRE_INPUT_HOOK static PyObject *pre_input_hook = NULL; -static PyThreadState *pre_input_hook_tstate = NULL; #endif static PyObject * set_startup_hook(PyObject *self, PyObject *args) { - return set_hook("startup_hook", &startup_hook, - &startup_hook_tstate, args); + return set_hook("startup_hook", &startup_hook, args); } PyDoc_STRVAR(doc_set_startup_hook, @@ -224,8 +216,7 @@ static PyObject * set_pre_input_hook(PyObject *self, PyObject *args) { - return set_hook("pre_input_hook", &pre_input_hook, - &pre_input_hook_tstate, args); + return set_hook("pre_input_hook", &pre_input_hook, args); } PyDoc_STRVAR(doc_set_pre_input_hook, @@ -241,7 +232,6 @@ /* Exported function to specify a word completer in Python */ static PyObject *completer = NULL; -static PyThreadState *completer_tstate = NULL; static PyObject *begidx = NULL; static PyObject *endidx = NULL; @@ -303,6 +293,11 @@ if (!PyArg_ParseTuple(args, "i:remove_history", &entry_number)) return NULL; + if (entry_number < 0) { + PyErr_SetString(PyExc_ValueError, + "History index cannot be negative"); + return NULL; + } entry = remove_history(entry_number); if (!entry) { PyErr_Format(PyExc_ValueError, @@ -335,6 +330,11 @@ if (!PyArg_ParseTuple(args, "is:replace_history", &entry_number, &line)) { return NULL; } + if (entry_number < 0) { + PyErr_SetString(PyExc_ValueError, + "History index cannot be negative"); + return NULL; + } old_entry = replace_history_entry(entry_number, line, (void *)NULL); if (!old_entry) { PyErr_Format(PyExc_ValueError, @@ -395,7 +395,7 @@ static PyObject * set_completer(PyObject *self, PyObject *args) { - return set_hook("completer", &completer, &completer_tstate, args); + return set_hook("completer", &completer, args); } PyDoc_STRVAR(doc_set_completer, @@ -576,28 +576,34 @@ /* C function to call the Python hooks. */ static int -on_hook(PyObject *func, PyThreadState **tstate) +on_hook(PyObject *func) { int result = 0; if (func != NULL) { PyObject *r; - /* Note that readline is called with the interpreter - lock released! */ - PyEval_RestoreThread(*tstate); +#ifdef WITH_THREAD + PyGILState_STATE gilstate = PyGILState_Ensure(); +#endif r = PyObject_CallFunction(func, NULL); if (r == NULL) goto error; if (r == Py_None) result = 0; - else + else { result = PyInt_AsLong(r); + if (result == -1 && PyErr_Occurred()) + goto error; + } Py_DECREF(r); goto done; error: PyErr_Clear(); Py_XDECREF(r); done: - *tstate = PyEval_SaveThread(); +#ifdef WITH_THREAD + PyGILState_Release(gilstate); +#endif + return result; } return result; } @@ -605,14 +611,14 @@ static int on_startup_hook(void) { - return on_hook(startup_hook, &startup_hook_tstate); + return on_hook(startup_hook); } #ifdef HAVE_RL_PRE_INPUT_HOOK static int on_pre_input_hook(void) { - return on_hook(pre_input_hook, &pre_input_hook_tstate); + return on_hook(pre_input_hook); } #endif @@ -625,11 +631,9 @@ char *result = NULL; if (completer != NULL) { PyObject *r; - /* Note that readline is called with the interpreter - lock released! */ - PyEval_RestoreThread(completer_tstate); - /* Don't use the default filename completion if we - * have a custom completion function... */ +#ifdef WITH_THREAD + PyGILState_STATE gilstate = PyGILState_Ensure(); +#endif rl_attempted_completion_over = 1; r = PyObject_CallFunction(completer, "si", text, state); if (r == NULL) @@ -649,7 +653,10 @@ PyErr_Clear(); Py_XDECREF(r); done: - completer_tstate = PyEval_SaveThread(); +#ifdef WITH_THREAD + PyGILState_Release(gilstate); +#endif + return result; } return result; } @@ -770,9 +777,13 @@ } else if (errno == EINTR) { int s; +#ifdef WITH_THREAD PyEval_RestoreThread(_PyOS_ReadlineTState); +#endif s = PyErr_CheckSignals(); +#ifdef WITH_THREAD PyEval_SaveThread(); +#endif if (s < 0) { rl_free_line_state(); rl_cleanup_after_signal(); Index: shamodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/shamodule.c,v retrieving revision 2.19.2.1 retrieving revision 2.19.2.2 diff -u -d -r2.19.2.1 -r2.19.2.2 --- shamodule.c 28 Apr 2003 17:19:20 -0000 2.19.2.1 +++ shamodule.c 16 Oct 2005 05:24:04 -0000 2.19.2.2 @@ -7,11 +7,16 @@ Andrew Kuchling (amk at amk.ca) Greg Stein (gstein at lyra.org) + + Copyright (C) 2005 Gregory P. Smith (greg at electricrain.com) + Licensed to PSF under a Contributor Agreement. + */ /* SHA objects */ #include "Python.h" +#include "structmember.h" /* Endianness testing and definitions */ @@ -453,26 +458,78 @@ }; static PyObject * -SHA_getattr(PyObject *self, char *name) +SHA_get_block_size(PyObject *self, void *closure) { - if (strcmp(name, "blocksize")==0) - return PyInt_FromLong(1); - if (strcmp(name, "digest_size")==0 || strcmp(name, "digestsize")==0) - return PyInt_FromLong(20); + return PyInt_FromLong(SHA_BLOCKSIZE); +} - return Py_FindMethod(SHA_methods, self, name); +static PyObject * +SHA_get_digest_size(PyObject *self, void *closure) +{ + return PyInt_FromLong(SHA_DIGESTSIZE); +} + +static PyObject * +SHA_get_name(PyObject *self, void *closure) +{ + return PyString_FromStringAndSize("SHA1", 4); } +static PyGetSetDef SHA_getseters[] = { + {"digest_size", + (getter)SHA_get_digest_size, NULL, + NULL, + NULL}, + {"block_size", + (getter)SHA_get_block_size, NULL, + NULL, + NULL}, + {"name", + (getter)SHA_get_name, NULL, + NULL, + NULL}, + /* the old md5 and sha modules support 'digest_size' as in PEP 247. + * the old sha module also supported 'digestsize'. ugh. */ + {"digestsize", + (getter)SHA_get_digest_size, NULL, + NULL, + NULL}, + {NULL} /* Sentinel */ +}; + static PyTypeObject SHAtype = { PyObject_HEAD_INIT(NULL) 0, /*ob_size*/ - "sha.SHA", /*tp_name*/ + "_sha.sha", /*tp_name*/ sizeof(SHAobject), /*tp_size*/ 0, /*tp_itemsize*/ /* methods */ SHA_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ - SHA_getattr, /*tp_getattr*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + 0, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + SHA_methods, /* tp_methods */ + 0, /* tp_members */ + SHA_getseters, /* tp_getset */ }; @@ -516,7 +573,6 @@ static struct PyMethodDef SHA_functions[] = { {"new", (PyCFunction)SHA_new, METH_VARARGS|METH_KEYWORDS, SHA_new__doc__}, - {"sha", (PyCFunction)SHA_new, METH_VARARGS|METH_KEYWORDS, SHA_new__doc__}, {NULL, NULL} /* Sentinel */ }; @@ -526,12 +582,14 @@ #define insint(n,v) { PyModule_AddIntConstant(m,n,v); } PyMODINIT_FUNC -initsha(void) +init_sha(void) { PyObject *m; SHAtype.ob_type = &PyType_Type; - m = Py_InitModule("sha", SHA_functions); + if (PyType_Ready(&SHAtype) < 0) + return; + m = Py_InitModule("_sha", SHA_functions); /* Add some symbolic constants to the module */ insint("blocksize", 1); /* For future use, in case some hash Index: signalmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/signalmodule.c,v retrieving revision 2.70.2.2 retrieving revision 2.70.2.3 diff -u -d -r2.70.2.2 -r2.70.2.3 --- signalmodule.c 7 Jan 2005 07:03:20 -0000 2.70.2.2 +++ signalmodule.c 16 Oct 2005 05:24:04 -0000 2.70.2.3 @@ -669,5 +669,6 @@ PyEval_ReInitThreads(); main_thread = PyThread_get_thread_ident(); main_pid = getpid(); + _PyImport_ReInitLock(); #endif } Index: socketmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/socketmodule.c,v retrieving revision 1.229.2.2 retrieving revision 1.229.2.3 diff -u -d -r1.229.2.2 -r1.229.2.3 --- socketmodule.c 7 Jan 2005 07:03:21 -0000 1.229.2.2 +++ socketmodule.c 16 Oct 2005 05:24:04 -0000 1.229.2.3 @@ -140,9 +140,14 @@ # define USE_GETHOSTBYNAME_LOCK #endif +/* To use __FreeBSD_version */ +#ifdef HAVE_SYS_PARAM_H +#include +#endif /* On systems on which getaddrinfo() is believed to not be thread-safe, (this includes the getaddrinfo emulation) protect access with a lock. */ -#if defined(WITH_THREAD) && (defined(__APPLE__) || defined(__FreeBSD__) || \ +#if defined(WITH_THREAD) && (defined(__APPLE__) || \ + (defined(__FreeBSD__) && __FreeBSD_version+0 < 503000) || \ defined(__OpenBSD__) || defined(__NetBSD__) || !defined(HAVE_GETADDRINFO)) #define USE_GETADDRINFO_LOCK #endif @@ -1344,7 +1349,7 @@ static PyObject * sock_accept(PySocketSockObject *s) { - char addrbuf[256]; + sock_addr_t addrbuf; SOCKET_T newfd; socklen_t addrlen; PyObject *sock = NULL; @@ -1354,7 +1359,7 @@ if (!getsockaddrlen(s, &addrlen)) return NULL; - memset(addrbuf, 0, addrlen); + memset(&addrbuf, 0, addrlen); #ifdef MS_WINDOWS newfd = INVALID_SOCKET; @@ -1365,7 +1370,7 @@ Py_BEGIN_ALLOW_THREADS timeout = internal_select(s, 0); if (!timeout) - newfd = accept(s->sock_fd, (struct sockaddr *) addrbuf, + newfd = accept(s->sock_fd, (struct sockaddr *) &addrbuf, &addrlen); Py_END_ALLOW_THREADS @@ -1392,7 +1397,7 @@ SOCKETCLOSE(newfd); goto finally; } - addr = makesockaddr(s->sock_fd, (struct sockaddr *)addrbuf, + addr = makesockaddr(s->sock_fd, (struct sockaddr *) &addrbuf, addrlen, s->sock_proto); if (addr == NULL) goto finally; @@ -1865,19 +1870,19 @@ static PyObject * sock_getsockname(PySocketSockObject *s) { - char addrbuf[256]; + sock_addr_t addrbuf; int res; socklen_t addrlen; if (!getsockaddrlen(s, &addrlen)) return NULL; - memset(addrbuf, 0, addrlen); + memset(&addrbuf, 0, addrlen); Py_BEGIN_ALLOW_THREADS - res = getsockname(s->sock_fd, (struct sockaddr *) addrbuf, &addrlen); + res = getsockname(s->sock_fd, (struct sockaddr *) &addrbuf, &addrlen); Py_END_ALLOW_THREADS if (res < 0) return s->errorhandler(); - return makesockaddr(s->sock_fd, (struct sockaddr *) addrbuf, addrlen, + return makesockaddr(s->sock_fd, (struct sockaddr *) &addrbuf, addrlen, s->sock_proto); } @@ -1894,19 +1899,19 @@ static PyObject * sock_getpeername(PySocketSockObject *s) { - char addrbuf[256]; + sock_addr_t addrbuf; int res; socklen_t addrlen; if (!getsockaddrlen(s, &addrlen)) return NULL; - memset(addrbuf, 0, addrlen); + memset(&addrbuf, 0, addrlen); Py_BEGIN_ALLOW_THREADS - res = getpeername(s->sock_fd, (struct sockaddr *) addrbuf, &addrlen); + res = getpeername(s->sock_fd, (struct sockaddr *) &addrbuf, &addrlen); Py_END_ALLOW_THREADS if (res < 0) return s->errorhandler(); - return makesockaddr(s->sock_fd, (struct sockaddr *) addrbuf, addrlen, + return makesockaddr(s->sock_fd, (struct sockaddr *) &addrbuf, addrlen, s->sock_proto); } @@ -2115,7 +2120,7 @@ static PyObject * sock_recvfrom(PySocketSockObject *s, PyObject *args) { - char addrbuf[256]; + sock_addr_t addrbuf; PyObject *buf = NULL; PyObject *addr = NULL; PyObject *ret = NULL; @@ -2132,18 +2137,18 @@ return NULL; Py_BEGIN_ALLOW_THREADS - memset(addrbuf, 0, addrlen); + memset(&addrbuf, 0, addrlen); timeout = internal_select(s, 0); if (!timeout) n = recvfrom(s->sock_fd, PyString_AS_STRING(buf), len, flags, #ifndef MS_WINDOWS #if defined(PYOS_OS2) && !defined(PYCC_GCC) - (struct sockaddr *)addrbuf, &addrlen + (struct sockaddr *) &addrbuf, &addrlen #else - (void *)addrbuf, &addrlen + (void *) &addrbuf, &addrlen #endif #else - (struct sockaddr *)addrbuf, &addrlen + (struct sockaddr *) &addrbuf, &addrlen #endif ); Py_END_ALLOW_THREADS @@ -2161,7 +2166,7 @@ if (n != len && _PyString_Resize(&buf, n) < 0) return NULL; - if (!(addr = makesockaddr(s->sock_fd, (struct sockaddr *)addrbuf, + if (!(addr = makesockaddr(s->sock_fd, (struct sockaddr *) &addrbuf, addrlen, s->sock_proto))) goto finally; @@ -2589,11 +2594,7 @@ socket_gethostbyname(PyObject *self, PyObject *args) { char *name; -#ifdef ENABLE_IPV6 - struct sockaddr_storage addrbuf; -#else - struct sockaddr_in addrbuf; -#endif + sock_addr_t addrbuf; if (!PyArg_ParseTuple(args, "s:gethostbyname", &name)) return NULL; @@ -3238,14 +3239,19 @@ return NULL; #else /* ! HAVE_INET_ATON */ - /* XXX Problem here: inet_aton('255.255.255.255') raises - an exception while it should be a valid address. */ - packed_addr = inet_addr(ip_addr); + /* special-case this address as inet_addr might return INADDR_NONE + * for this */ + if (strcmp(ip_addr, "255.255.255.255") == 0) { + packed_addr = 0xFFFFFFFF; + } else { + + packed_addr = inet_addr(ip_addr); - if (packed_addr == INADDR_NONE) { /* invalid address */ - PyErr_SetString(socket_error, - "illegal IP address string passed to inet_aton"); - return NULL; + if (packed_addr == INADDR_NONE) { /* invalid address */ + PyErr_SetString(socket_error, + "illegal IP address string passed to inet_aton"); + return NULL; + } } return PyString_FromStringAndSize((char *) &packed_addr, sizeof(packed_addr)); Index: socketmodule.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/socketmodule.h,v retrieving revision 1.8.2.1 retrieving revision 1.8.2.2 diff -u -d -r1.8.2.1 -r1.8.2.2 --- socketmodule.h 7 Jan 2005 07:03:22 -0000 1.8.2.1 +++ socketmodule.h 16 Oct 2005 05:24:04 -0000 1.8.2.2 @@ -72,6 +72,26 @@ # define SIZEOF_SOCKET_T SIZEOF_INT #endif +/* Socket address */ +typedef union sock_addr { + struct sockaddr_in in; +#ifdef AF_UNIX + struct sockaddr_un un; +#endif +#ifdef ENABLE_IPV6 + struct sockaddr_in6 in6; + struct sockaddr_storage storage; +#endif +#ifdef HAVE_BLUETOOTH_BLUETOOTH_H + struct sockaddr_l2 bt_l2; + struct sockaddr_rc bt_rc; + struct sockaddr_sco bt_sco; +#endif +#ifdef HAVE_NETPACKET_PACKET_H + struct sockaddr_ll ll; +#endif +} sock_addr_t; + /* The object holding a socket. It holds some extra information, like the address family, which is used to decode socket address arguments properly. */ @@ -82,24 +102,7 @@ int sock_family; /* Address family, e.g., AF_INET */ int sock_type; /* Socket type, e.g., SOCK_STREAM */ int sock_proto; /* Protocol type, usually 0 */ - union sock_addr { - struct sockaddr_in in; -#ifdef AF_UNIX - struct sockaddr_un un; -#endif -#ifdef ENABLE_IPV6 - struct sockaddr_in6 in6; - struct sockaddr_storage storage; -#endif -#ifdef HAVE_BLUETOOTH_BLUETOOTH_H - struct sockaddr_l2 bt_l2; - struct sockaddr_rc bt_rc; - struct sockaddr_sco bt_sco; -#endif -#ifdef HAVE_NETPACKET_PACKET_H - struct sockaddr_ll ll; -#endif - } sock_addr; + sock_addr_t sock_addr; /* Socket address */ PyObject *(*errorhandler)(void); /* Error handler; checks errno, returns NULL and sets a Python exception */ Index: structmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/structmodule.c,v retrieving revision 2.55.2.2 retrieving revision 2.55.2.3 diff -u -d -r2.55.2.2 -r2.55.2.3 --- structmodule.c 7 Jan 2005 07:03:23 -0000 2.55.2.2 +++ structmodule.c 16 Oct 2005 05:24:04 -0000 2.55.2.3 @@ -959,7 +959,7 @@ s = fmt; size = 0; while ((c = *s++) != '\0') { - if (isspace((int)c)) + if (isspace(Py_CHARMASK(c))) continue; if ('0' <= c && c <= '9') { num = c - '0'; @@ -1059,7 +1059,7 @@ res = restart = PyString_AsString(result); while ((c = *s++) != '\0') { - if (isspace((int)c)) + if (isspace(Py_CHARMASK(c))) continue; if ('0' <= c && c <= '9') { num = c - '0'; @@ -1191,7 +1191,7 @@ str = start; s = fmt; while ((c = *s++) != '\0') { - if (isspace((int)c)) + if (isspace(Py_CHARMASK(c))) continue; if ('0' <= c && c <= '9') { num = c - '0'; Index: threadmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/threadmodule.c,v retrieving revision 2.50.2.2 retrieving revision 2.50.2.3 diff -u -d -r2.50.2.2 -r2.50.2.3 --- threadmodule.c 7 Jan 2005 07:03:24 -0000 2.50.2.2 +++ threadmodule.c 16 Oct 2005 05:24:04 -0000 2.50.2.3 @@ -63,12 +63,7 @@ i = PyThread_acquire_lock(self->lock_lock, i); Py_END_ALLOW_THREADS - if (args == NULL) { - Py_INCREF(Py_None); - return Py_None; - } - else - return PyBool_FromLong((long)i); + return PyBool_FromLong((long)i); } PyDoc_STRVAR(acquire_doc, @@ -163,11 +158,11 @@ #include "structmember.h" typedef struct { - PyObject_HEAD - PyObject *key; - PyObject *args; - PyObject *kw; - PyObject *dict; + PyObject_HEAD + PyObject *key; + PyObject *args; + PyObject *kw; + PyObject *dict; } localobject; static PyTypeObject localtype; @@ -175,91 +170,87 @@ static PyObject * local_new(PyTypeObject *type, PyObject *args, PyObject *kw) { - localobject *self; - PyObject *tdict; + localobject *self; + PyObject *tdict; - if (type->tp_init == PyBaseObject_Type.tp_init - && ((args && PyObject_IsTrue(args)) - || - (kw && PyObject_IsTrue(kw)) - ) - ) { - PyErr_SetString(PyExc_TypeError, - "Initialization arguments are not supported"); - return NULL; - } + if (type->tp_init == PyBaseObject_Type.tp_init + && ((args && PyObject_IsTrue(args)) + || (kw && PyObject_IsTrue(kw)))) { + PyErr_SetString(PyExc_TypeError, + "Initialization arguments are not supported"); + return NULL; + } - self = (localobject *)type->tp_alloc(type, 0); - if (self == NULL) - return NULL; + self = (localobject *)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; - Py_XINCREF(args); - self->args = args; - Py_XINCREF(kw); - self->kw = kw; - self->dict = NULL; /* making sure */ - self->key = PyString_FromFormat("thread.local.%p", self); - if (self->key == NULL) - goto err; + Py_XINCREF(args); + self->args = args; + Py_XINCREF(kw); + self->kw = kw; + self->dict = NULL; /* making sure */ + self->key = PyString_FromFormat("thread.local.%p", self); + if (self->key == NULL) + goto err; - self->dict = PyDict_New(); - if (self->dict == NULL) - goto err; + self->dict = PyDict_New(); + if (self->dict == NULL) + goto err; - tdict = PyThreadState_GetDict(); - if (tdict == NULL) { - PyErr_SetString(PyExc_SystemError, - "Couldn't get thread-state dictionary"); - goto err; - } + tdict = PyThreadState_GetDict(); + if (tdict == NULL) { + PyErr_SetString(PyExc_SystemError, + "Couldn't get thread-state dictionary"); + goto err; + } - if (PyDict_SetItem(tdict, self->key, self->dict) < 0) - goto err; - - return (PyObject *)self; + if (PyDict_SetItem(tdict, self->key, self->dict) < 0) + goto err; - err: - Py_DECREF(self); - return NULL; + return (PyObject *)self; + + err: + Py_DECREF(self); + return NULL; } static int local_traverse(localobject *self, visitproc visit, void *arg) { - Py_VISIT(self->args); - Py_VISIT(self->kw); - Py_VISIT(self->dict); + Py_VISIT(self->args); + Py_VISIT(self->kw); + Py_VISIT(self->dict); return 0; } static int local_clear(localobject *self) { - Py_CLEAR(self->key); - Py_CLEAR(self->args); - Py_CLEAR(self->kw); - Py_CLEAR(self->dict); - return 0; + Py_CLEAR(self->key); + Py_CLEAR(self->args); + Py_CLEAR(self->kw); + Py_CLEAR(self->dict); + return 0; } static void local_dealloc(localobject *self) { - PyThreadState *tstate; - if (self->key - && (tstate = PyThreadState_Get()) - && tstate->interp) { - for(tstate = PyInterpreterState_ThreadHead(tstate->interp); - tstate; - tstate = PyThreadState_Next(tstate) - ) - if (tstate->dict && - PyDict_GetItem(tstate->dict, self->key)) - PyDict_DelItem(tstate->dict, self->key); - } + PyThreadState *tstate; + if (self->key + && (tstate = PyThreadState_Get()) + && tstate->interp) { + for(tstate = PyInterpreterState_ThreadHead(tstate->interp); + tstate; + tstate = PyThreadState_Next(tstate)) + if (tstate->dict && + PyDict_GetItem(tstate->dict, self->key)) + PyDict_DelItem(tstate->dict, self->key); + } - local_clear(self); - self->ob_type->tp_free((PyObject*)self); + local_clear(self); + self->ob_type->tp_free((PyObject*)self); } static PyObject * @@ -268,48 +259,47 @@ PyObject *tdict, *ldict; tdict = PyThreadState_GetDict(); - if (tdict == NULL) { - PyErr_SetString(PyExc_SystemError, - "Couldn't get thread-state dictionary"); - return NULL; - } + if (tdict == NULL) { + PyErr_SetString(PyExc_SystemError, + "Couldn't get thread-state dictionary"); + return NULL; + } - ldict = PyDict_GetItem(tdict, self->key); - if (ldict == NULL) { - ldict = PyDict_New(); /* we own ldict */ + ldict = PyDict_GetItem(tdict, self->key); + if (ldict == NULL) { + ldict = PyDict_New(); /* we own ldict */ - if (ldict == NULL) - return NULL; - else { - int i = PyDict_SetItem(tdict, self->key, ldict); - Py_DECREF(ldict); /* now ldict is borowed */ - if (i < 0) - return NULL; - } + if (ldict == NULL) + return NULL; + else { + int i = PyDict_SetItem(tdict, self->key, ldict); + Py_DECREF(ldict); /* now ldict is borowed */ + if (i < 0) + return NULL; + } - Py_CLEAR(self->dict); - Py_INCREF(ldict); - self->dict = ldict; /* still borrowed */ + Py_CLEAR(self->dict); + Py_INCREF(ldict); + self->dict = ldict; /* still borrowed */ - if (self->ob_type->tp_init != PyBaseObject_Type.tp_init && - self->ob_type->tp_init((PyObject*)self, - self->args, self->kw) < 0 - ) { - /* we need to get rid of ldict from thread so - we create a new one the next time we do an attr - acces */ - PyDict_DelItem(tdict, self->key); - return NULL; - } - - } - else if (self->dict != ldict) { - Py_CLEAR(self->dict); - Py_INCREF(ldict); - self->dict = ldict; - } + if (self->ob_type->tp_init != PyBaseObject_Type.tp_init && + self->ob_type->tp_init((PyObject*)self, + self->args, self->kw) < 0) { + /* we need to get rid of ldict from thread so + we create a new one the next time we do an attr + acces */ + PyDict_DelItem(tdict, self->key); + return NULL; + } + + } + else if (self->dict != ldict) { + Py_CLEAR(self->dict); + Py_INCREF(ldict); + self->dict = ldict; + } - return ldict; + return ldict; } static PyObject * @@ -317,54 +307,52 @@ { PyObject *ldict, *value; - ldict = _ldict(self); - if (ldict == NULL) - return NULL; + ldict = _ldict(self); + if (ldict == NULL) + return NULL; - if (self->ob_type != &localtype) - /* use generic lookup for subtypes */ - return PyObject_GenericGetAttr((PyObject *)self, name); + if (self->ob_type != &localtype) + /* use generic lookup for subtypes */ + return PyObject_GenericGetAttr((PyObject *)self, name); - /* Optimization: just look in dict ourselves */ - value = PyDict_GetItem(ldict, name); - if (value == NULL) - /* Fall back on generic to get __class__ and __dict__ */ - return PyObject_GenericGetAttr((PyObject *)self, name); + /* Optimization: just look in dict ourselves */ + value = PyDict_GetItem(ldict, name); + if (value == NULL) + /* Fall back on generic to get __class__ and __dict__ */ + return PyObject_GenericGetAttr((PyObject *)self, name); - Py_INCREF(value); - return value; + Py_INCREF(value); + return value; } static int local_setattro(localobject *self, PyObject *name, PyObject *v) { PyObject *ldict; - - ldict = _ldict(self); - if (ldict == NULL) - return -1; + + ldict = _ldict(self); + if (ldict == NULL) + return -1; - return PyObject_GenericSetAttr((PyObject *)self, name, v); + return PyObject_GenericSetAttr((PyObject *)self, name, v); } static PyObject * local_getdict(localobject *self, void *closure) { - if (self->dict == NULL) { - PyErr_SetString(PyExc_AttributeError, "__dict__"); - return NULL; - } + if (self->dict == NULL) { + PyErr_SetString(PyExc_AttributeError, "__dict__"); + return NULL; + } - Py_INCREF(self->dict); - return self->dict; + Py_INCREF(self->dict); + return self->dict; } static PyGetSetDef local_getset[] = { - {"__dict__", - (getter)local_getdict, (setter)0, - "Local-data dictionary", - NULL}, - {NULL} /* Sentinel */ + {"__dict__", (getter)local_getdict, (setter)NULL, + "Local-data dictionary", NULL}, + {NULL} /* Sentinel */ }; static PyTypeObject localtype = { @@ -385,28 +373,28 @@ /* tp_hash */ (hashfunc)0, /* tp_call */ (ternaryfunc)0, /* tp_str */ (reprfunc)0, - /* tp_getattro */ (getattrofunc)local_getattro, - /* tp_setattro */ (setattrofunc)local_setattro, - /* tp_as_buffer */ 0, - /* tp_flags */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + /* tp_getattro */ (getattrofunc)local_getattro, + /* tp_setattro */ (setattrofunc)local_setattro, + /* tp_as_buffer */ 0, + /* tp_flags */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_doc */ "Thread-local data", - /* tp_traverse */ (traverseproc)local_traverse, - /* tp_clear */ (inquiry)local_clear, - /* tp_richcompare */ (richcmpfunc)0, - /* tp_weaklistoffset */ (long)0, - /* tp_iter */ (getiterfunc)0, - /* tp_iternext */ (iternextfunc)0, - /* tp_methods */ 0, - /* tp_members */ 0, - /* tp_getset */ local_getset, - /* tp_base */ 0, - /* tp_dict */ 0, /* internal use */ - /* tp_descr_get */ (descrgetfunc)0, - /* tp_descr_set */ (descrsetfunc)0, - /* tp_dictoffset */ offsetof(localobject, dict), - /* tp_init */ (initproc)0, - /* tp_alloc */ (allocfunc)0, - /* tp_new */ (newfunc)local_new, + /* tp_traverse */ (traverseproc)local_traverse, + /* tp_clear */ (inquiry)local_clear, + /* tp_richcompare */ (richcmpfunc)0, + /* tp_weaklistoffset */ (long)0, + /* tp_iter */ (getiterfunc)0, + /* tp_iternext */ (iternextfunc)0, + /* tp_methods */ 0, + /* tp_members */ 0, + /* tp_getset */ local_getset, + /* tp_base */ 0, + /* tp_dict */ 0, /* internal use */ + /* tp_descr_get */ (descrgetfunc)0, + /* tp_descr_set */ (descrsetfunc)0, + /* tp_dictoffset */ offsetof(localobject, dict), + /* tp_init */ (initproc)0, + /* tp_alloc */ (allocfunc)0, + /* tp_new */ (newfunc)local_new, /* tp_free */ 0, /* Low-level free-mem routine */ /* tp_is_gc */ (inquiry)0, /* For PyObject_IS_GC */ }; @@ -425,10 +413,12 @@ t_bootstrap(void *boot_raw) { struct bootstate *boot = (struct bootstate *) boot_raw; - PyGILState_STATE gstate; + PyThreadState *tstate; PyObject *res; - gstate = PyGILState_Ensure(); + tstate = PyThreadState_New(boot->interp); + + PyEval_AcquireThread(tstate); res = PyEval_CallObjectWithKeywords( boot->func, boot->args, boot->keyw); if (res == NULL) { @@ -453,7 +443,8 @@ Py_DECREF(boot->args); Py_XDECREF(boot->keyw); PyMem_DEL(boot_raw); - PyGILState_Release(gstate); + PyThreadState_Clear(tstate); + PyThreadState_DeleteCurrent(); PyThread_exit_thread(); } @@ -494,7 +485,7 @@ PyEval_InitThreads(); /* Start the interpreter's thread-awareness */ ident = PyThread_start_new_thread(t_bootstrap, (void*) boot); if (ident == -1) { - PyErr_SetString(ThreadError, "can't start new thread\n"); + PyErr_SetString(ThreadError, "can't start new thread"); Py_DECREF(func); Py_DECREF(args); Py_XDECREF(keyw); @@ -640,10 +631,10 @@ initthread(void) { PyObject *m, *d; - - /* Initialize types: */ - if (PyType_Ready(&localtype) < 0) - return; + + /* Initialize types: */ + if (PyType_Ready(&localtype) < 0) + return; /* Create the module and add the functions */ m = Py_InitModule3("thread", thread_methods, thread_doc); @@ -656,8 +647,9 @@ Py_INCREF(&Locktype); PyDict_SetItemString(d, "LockType", (PyObject *)&Locktype); - if (PyModule_AddObject(m, "_local", (PyObject *)&localtype) < 0) - return; + Py_INCREF(&localtype); + if (PyModule_AddObject(m, "_local", (PyObject *)&localtype) < 0) + return; /* Initialize the C thread library */ PyThread_init_thread(); Index: unicodedata.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/unicodedata.c,v retrieving revision 2.18.2.2 retrieving revision 2.18.2.3 diff -u -d -r2.18.2.2 -r2.18.2.3 --- unicodedata.c 7 Jan 2005 07:03:24 -0000 2.18.2.2 +++ unicodedata.c 16 Oct 2005 05:24:04 -0000 2.18.2.3 @@ -53,6 +53,13 @@ /* --- Module API --------------------------------------------------------- */ +PyDoc_STRVAR(unicodedata_decimal__doc__, +"decimal(unichr[, default])\n\ +\n\ +Returns the decimal value assigned to the Unicode character unichr\n\ +as integer. If no such value is defined, default is returned, or, if\n\ +not given, ValueError is raised."); + static PyObject * unicodedata_decimal(PyObject *self, PyObject *args) { @@ -82,6 +89,13 @@ return PyInt_FromLong(rc); } +PyDoc_STRVAR(unicodedata_digit__doc__, +"digit(unichr[, default])\n\ +\n\ +Returns the digit value assigned to the Unicode character unichr as\n\ +integer. If no such value is defined, default is returned, or, if\n\ +not given, ValueError is raised."); + static PyObject * unicodedata_digit(PyObject *self, PyObject *args) { @@ -110,6 +124,13 @@ return PyInt_FromLong(rc); } +PyDoc_STRVAR(unicodedata_numeric__doc__, +"numeric(unichr[, default])\n\ +\n\ +Returns the numeric value assigned to the Unicode character unichr\n\ +as float. If no such value is defined, default is returned, or, if\n\ +not given, ValueError is raised."); + static PyObject * unicodedata_numeric(PyObject *self, PyObject *args) { @@ -138,6 +159,12 @@ return PyFloat_FromDouble(rc); } +PyDoc_STRVAR(unicodedata_category__doc__, +"category(unichr)\n\ +\n\ +Returns the general category assigned to the Unicode character\n\ +unichr as string."); + static PyObject * unicodedata_category(PyObject *self, PyObject *args) { @@ -156,6 +183,13 @@ return PyString_FromString(_PyUnicode_CategoryNames[index]); } +PyDoc_STRVAR(unicodedata_bidirectional__doc__, +"bidirectional(unichr)\n\ +\n\ +Returns the bidirectional category assigned to the Unicode character\n\ +unichr as string. If no such value is defined, an empty string is\n\ +returned."); + static PyObject * unicodedata_bidirectional(PyObject *self, PyObject *args) { @@ -174,6 +208,13 @@ return PyString_FromString(_PyUnicode_BidirectionalNames[index]); } +PyDoc_STRVAR(unicodedata_combining__doc__, +"combining(unichr)\n\ +\n\ +Returns the canonical combining class assigned to the Unicode\n\ +character unichr as integer. Returns 0 if no combining class is\n\ +defined."); + static PyObject * unicodedata_combining(PyObject *self, PyObject *args) { @@ -190,6 +231,13 @@ return PyInt_FromLong((int) _getrecord(v)->combining); } +PyDoc_STRVAR(unicodedata_mirrored__doc__, +"mirrored(unichr)\n\ +\n\ +Returns the mirrored property assigned to the Unicode character\n\ +unichr as integer. Returns 1 if the character has been identified as\n\ +a \"mirrored\" character in bidirectional text, 0 otherwise."); + static PyObject * unicodedata_mirrored(PyObject *self, PyObject *args) { @@ -206,6 +254,12 @@ return PyInt_FromLong((int) _getrecord(v)->mirrored); } +PyDoc_STRVAR(unicodedata_east_asian_width__doc__, +"east_asian_width(unichr)\n\ +\n\ +Returns the east asian width assigned to the Unicode character\n\ +unichr as string."); + static PyObject * unicodedata_east_asian_width(PyObject *self, PyObject *args) { @@ -224,6 +278,13 @@ return PyString_FromString(_PyUnicode_EastAsianWidthNames[index]); } +PyDoc_STRVAR(unicodedata_decomposition__doc__, +"decomposition(unichr)\n\ +\n\ +Returns the character decomposition mapping assigned to the Unicode\n\ +character unichr as string. An empty string is returned in case no\n\ +such mapping is defined."); + static PyObject * unicodedata_decomposition(PyObject *self, PyObject *args) { @@ -525,6 +586,12 @@ return result; } +PyDoc_STRVAR(unicodedata_normalize__doc__, +"normalize(form, unistr)\n\ +\n\ +Return the normal form 'form' for the Unicode string unistr. Valid\n\ +values for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'."); + static PyObject* unicodedata_normalize(PyObject *self, PyObject *args) { @@ -753,7 +820,7 @@ pos += len; find_syllable(pos, &len, &T, TCount, 2); pos += len; - if (V != -1 && V != -1 && T != -1 && pos-name == namelen) { + if (L != -1 && V != -1 && T != -1 && pos-name == namelen) { *code = SBase + (L*VCount+V)*TCount + T; return 1; } @@ -826,6 +893,12 @@ /* -------------------------------------------------------------------- */ /* Python bindings */ +PyDoc_STRVAR(unicodedata_name__doc__, +"name(unichr[, default])\n\ +Returns the name assigned to the Unicode character unichr as a\n\ +string. If no name is defined, default is returned, or, if not\n\ +given, ValueError is raised."); + static PyObject * unicodedata_name(PyObject* self, PyObject* args) { @@ -857,6 +930,13 @@ return Py_BuildValue("s", name); } +PyDoc_STRVAR(unicodedata_lookup__doc__, +"lookup(name)\n\ +\n\ +Look up character by name. If a character with the\n\ +given name is found, return the corresponding Unicode\n\ +character. If not found, KeyError is raised."); + static PyObject * unicodedata_lookup(PyObject* self, PyObject* args) { @@ -884,22 +964,37 @@ /* XXX Add doc strings. */ static PyMethodDef unicodedata_functions[] = { - {"decimal", unicodedata_decimal, METH_VARARGS}, - {"digit", unicodedata_digit, METH_VARARGS}, - {"numeric", unicodedata_numeric, METH_VARARGS}, - {"category", unicodedata_category, METH_VARARGS}, - {"bidirectional", unicodedata_bidirectional, METH_VARARGS}, - {"combining", unicodedata_combining, METH_VARARGS}, - {"mirrored", unicodedata_mirrored, METH_VARARGS}, - {"east_asian_width", unicodedata_east_asian_width, METH_VARARGS}, - {"decomposition",unicodedata_decomposition, METH_VARARGS}, - {"name", unicodedata_name, METH_VARARGS}, - {"lookup", unicodedata_lookup, METH_VARARGS}, - {"normalize", unicodedata_normalize, METH_VARARGS}, + {"decimal", unicodedata_decimal, METH_VARARGS, unicodedata_decimal__doc__}, + {"digit", unicodedata_digit, METH_VARARGS, unicodedata_digit__doc__}, + {"numeric", unicodedata_numeric, METH_VARARGS, unicodedata_numeric__doc__}, + {"category", unicodedata_category, METH_VARARGS, + unicodedata_category__doc__}, + {"bidirectional", unicodedata_bidirectional, METH_VARARGS, + unicodedata_bidirectional__doc__}, + {"combining", unicodedata_combining, METH_VARARGS, + unicodedata_combining__doc__}, + {"mirrored", unicodedata_mirrored, METH_VARARGS, + unicodedata_mirrored__doc__}, + {"east_asian_width", unicodedata_east_asian_width, METH_VARARGS, + unicodedata_east_asian_width__doc__}, + {"decomposition", unicodedata_decomposition, METH_VARARGS, + unicodedata_decomposition__doc__}, + {"name", unicodedata_name, METH_VARARGS, unicodedata_name__doc__}, + {"lookup", unicodedata_lookup, METH_VARARGS, unicodedata_lookup__doc__}, + {"normalize", unicodedata_normalize, METH_VARARGS, + unicodedata_normalize__doc__}, {NULL, NULL} /* sentinel */ }; -PyDoc_STRVAR(unicodedata_docstring, "unicode character database"); +PyDoc_STRVAR(unicodedata_docstring, +"This module provides access to the Unicode Character Database which\n\ +defines character properties for all Unicode characters. The data in\n\ +this database is based on the UnicodeData.txt file version\n\ +3.2.0 which is publically available from ftp://ftp.unicode.org/.\n\ +\n\ +The module uses the same names and symbols as defined by the\n\ +UnicodeData File Format 3.2.0 (see\n\ +http://www.unicode.org/Public/3.2-Update/UnicodeData-3.2.0.html)."); PyMODINIT_FUNC initunicodedata(void) Index: zipimport.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/zipimport.c,v retrieving revision 1.13.4.2 retrieving revision 1.13.4.3 diff -u -d -r1.13.4.2 -r1.13.4.3 --- zipimport.c 7 Jan 2005 07:03:32 -0000 1.13.4.2 +++ zipimport.c 16 Oct 2005 05:24:04 -0000 1.13.4.3 @@ -65,6 +65,9 @@ char *path, *p, *prefix, buf[MAXPATHLEN+2]; int len; + if (!_PyArg_NoKeywords("zipimporter()", kwds)) + return -1; + if (!PyArg_ParseTuple(args, "s:zipimporter", &path)) return -1; From jhylton at users.sourceforge.net Sun Oct 16 07:24:41 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Sun, 16 Oct 2005 07:24:41 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test bad_coding.py, NONE, 1.1.2.2 test_cmd_line.py, NONE, 1.2.2.2 test_coding.py, NONE, 1.1.2.2 test_float.py, NONE, 1.2.2.2 test_functional.py, NONE, 1.4.2.2 test_hashlib.py, NONE, 1.1.2.2 test_hashlib_speed.py, NONE, 1.2.2.2 test_xdrlib.py, NONE, 1.2.2.2 inspect_fodder2.py, 1.2.2.1, 1.2.2.2 list_tests.py, 1.5.4.1, 1.5.4.2 regrtest.py, 1.87.2.2, 1.87.2.3 seq_tests.py, 1.3.4.1, 1.3.4.2 string_tests.py, 1.18.2.2, 1.18.2.3 test_StringIO.py, 1.13.2.2, 1.13.2.3 test__locale.py, 1.8.4.1, 1.8.4.2 test_al.py, 1.6.26.1, 1.6.26.2 test_array.py, 1.16.2.2, 1.16.2.3 test_asynchat.py, 1.4.24.1, 1.4.24.2 test_base64.py, 1.2.2.2, 1.2.2.3 test_binop.py, 1.6.14.1, 1.6.14.2 test_bisect.py, 1.1.26.2, 1.1.26.3 test_builtin.py, 1.1.36.2, 1.1.36.3 test_bz2.py, 1.12.8.2, 1.12.8.3 test_cd.py, 1.5.26.1, 1.5.26.2 test_cl.py, 1.5.26.1, 1.5.26.2 test_class.py, 1.8.2.2, 1.8.2.3 test_codeccallbacks.py, 1.10.8.2, 1.10.8.3 test_codecs.py, 1.2.14.2, 1.2.14.3 test_compiler.py, 1.10.4.1, 1.10.4.2 test_complex.py, 1.8.2.2, 1.8.2.3 test_cookie.py, 1.11.14.2, 1.11.14.3 test_cookielib.py, 1.1.6.1, 1.1.6.2 test_csv.py, 1.5.4.2, 1.5.4.3 test_curses.py, 1.1.10.2, 1.1.10.3 test_datetime.py, 1.41.4.2, 1.41.4.3 test_decimal.py, 1.16.2.1, 1.16.2.2 test_deque.py, 1.18.4.1, 1.18.4.2 test_descr.py, 1.144.2.2, 1.144.2.3 test_descrtut.py, 1.13.2.2, 1.13.2.3 test_doctest.py, 1.4.14.3, 1.4.14.4 test_doctest2.py, 1.3.12.2, 1.3.12.3 test_dumbdbm.py, 1.6.2.2, 1.6.2.3 test_enumerate.py, 1.2.2.2, 1.2.2.3 test_fcntl.py, 1.23.2.2, 1.23.2.3 test_file.py, 1.9.2.2, 1.9.2.3 test_funcattrs.py, 1.11.10.2, 1.11.10.3 test_generators.py, 1.34.2.2, 1.34.2.3 test_genexps.py, 1.7.4.2, 1.7.4.3 test_getargs2.py, 1.4.4.2, 1.4.4.3 test_glob.py, 1.3.14.2, 1.3.14.3 test_grammar.py, 1.40.8.4, 1.40.8.5 test_gzip.py, 1.9.2.2, 1.9.2.3 test_hmac.py, 1.4.2.2, 1.4.2.3 test_inspect.py, 1.7.12.2, 1.7.12.3 test_ioctl.py, 1.2.6.1, 1.2.6.2 test_isinstance.py, 1.3.2.2, 1.3.2.3 test_iterlen.py, 1.2.6.1, 1.2.6.2 test_itertools.py, 1.5.6.2, 1.5.6.3 test_locale.py, 1.3.20.2, 1.3.20.3 test_long.py, 1.16.2.2, 1.16.2.3 test_macfs.py, 1.7.4.1, 1.7.4.2 test_marshal.py, 1.1.16.2, 1.1.16.3 test_minidom.py, 1.32.2.2, 1.32.2.3 test_mmap.py, 1.22.2.1, 1.22.2.2 test_mutants.py, 1.6.22.1, 1.6.22.2 test_new.py, 1.14.8.2, 1.14.8.3 test_operator.py, 1.8.14.2, 1.8.14.3 test_os.py, 1.11.2.2, 1.11.2.3 test_ossaudiodev.py, 1.4.8.2, 1.4.8.3 test_parser.py, 1.11.2.3, 1.11.2.4 test_popen2.py, 1.6.2.2, 1.6.2.3 test_posix.py, 1.5.6.2, 1.5.6.3 test_posixpath.py, 1.4.26.2, 1.4.26.3 test_profile.py, 1.2.14.1, 1.2.14.2 test_re.py, 1.31.18.2, 1.31.18.3 test_richcmp.py, 1.6.20.2, 1.6.20.3 test_set.py, 1.16.4.1, 1.16.4.2 test_sets.py, 1.23.6.2, 1.23.6.3 test_site.py, 1.6.6.1, 1.6.6.2 test_socket.py, 1.39.2.2, 1.39.2.3 test_str.py, 1.2.4.2, 1.2.4.3 test_strptime.py, 1.11.4.2, 1.11.4.3 test_subprocess.py, 1.17.2.1, 1.17.2.2 test_sundry.py, 1.10.2.2, 1.10.2.3 test_support.py, 1.41.2.2, 1.41.2.3 test_sys.py, 1.6.6.2, 1.6.6.3 test_tarfile.py, 1.7.4.2, 1.7.4.3 test_tempfile.py, 1.1.10.2, 1.1.10.3 test_textwrap.py, 1.18.10.2, 1.18.10.3 test_threading.py, 1.2.18.1, 1.2.18.2 test_tokenize.py, 1.6.10.2, 1.6.10.3 test_trace.py, 1.7.10.3, 1.7.10.4 test_unicode.py, 1.58.2.2, 1.58.2.3 test_unicode_file.py, 1.4.2.2, 1.4.2.3 test_unicodedata.py, 1.5.26.2, 1.5.26.3 test_urllib.py, 1.8.26.2, 1.8.26.3 test_urllib2.py, 1.6.10.2, 1.6.10.3 test_urlparse.py, 1.5.2.2, 1.5.2.3 test_userdict.py, 1.5.10.2, 1.5.10.3 test_userstring.py, 1.6.2.2, 1.6.2.3 test_weakref.py, 1.19.2.2, 1.19.2.3 test_xmlrpc.py, 1.2.10.2, 1.2.10.3 test_zlib.py, 1.17.2.2, 1.17.2.3 Message-ID: <20051016052441.920FC1E400C@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Lib/test Modified Files: Tag: ast-branch inspect_fodder2.py list_tests.py regrtest.py seq_tests.py string_tests.py test_StringIO.py test__locale.py test_al.py test_array.py test_asynchat.py test_base64.py test_binop.py test_bisect.py test_builtin.py test_bz2.py test_cd.py test_cl.py test_class.py test_codeccallbacks.py test_codecs.py test_compiler.py test_complex.py test_cookie.py test_cookielib.py test_csv.py test_curses.py test_datetime.py test_decimal.py test_deque.py test_descr.py test_descrtut.py test_doctest.py test_doctest2.py test_dumbdbm.py test_enumerate.py test_fcntl.py test_file.py test_funcattrs.py test_generators.py test_genexps.py test_getargs2.py test_glob.py test_grammar.py test_gzip.py test_hmac.py test_inspect.py test_ioctl.py test_isinstance.py test_iterlen.py test_itertools.py test_locale.py test_long.py test_macfs.py test_marshal.py test_minidom.py test_mmap.py test_mutants.py test_new.py test_operator.py test_os.py test_ossaudiodev.py test_parser.py test_popen2.py test_posix.py test_posixpath.py test_profile.py test_re.py test_richcmp.py test_set.py test_sets.py test_site.py test_socket.py test_str.py test_strptime.py test_subprocess.py test_sundry.py test_support.py test_sys.py test_tarfile.py test_tempfile.py test_textwrap.py test_threading.py test_tokenize.py test_trace.py test_unicode.py test_unicode_file.py test_unicodedata.py test_urllib.py test_urllib2.py test_urlparse.py test_userdict.py test_userstring.py test_weakref.py test_xmlrpc.py test_zlib.py Added Files: Tag: ast-branch bad_coding.py test_cmd_line.py test_coding.py test_float.py test_functional.py test_hashlib.py test_hashlib_speed.py test_xdrlib.py Log Message: Merge head to branch (for the last time) --- NEW FILE: bad_coding.py --- # -*- coding: uft-8 -*- --- NEW FILE: test_cmd_line.py --- import test.test_support, unittest import sys import popen2 class CmdLineTest(unittest.TestCase): def start_python(self, cmd_line): outfp, infp = popen2.popen4('%s %s' % (sys.executable, cmd_line)) infp.close() data = outfp.read() outfp.close() return data def test_directories(self): self.assertTrue('is a directory' in self.start_python('.')) self.assertTrue('is a directory' in self.start_python('< .')) def verify_valid_flag(self, cmd_line): data = self.start_python(cmd_line) self.assertTrue(data == '' or data.endswith('\n')) self.assertTrue('Traceback' not in data) def test_environment(self): self.verify_valid_flag('-E') def test_optimize(self): self.verify_valid_flag('-O') self.verify_valid_flag('-OO') def test_q(self): self.verify_valid_flag('-Qold') self.verify_valid_flag('-Qnew') self.verify_valid_flag('-Qwarn') self.verify_valid_flag('-Qwarnall') def test_site_flag(self): self.verify_valid_flag('-S') def test_usage(self): self.assertTrue('usage' in self.start_python('-h')) def test_version(self): version = 'Python %d.%d' % sys.version_info[:2] self.assertTrue(self.start_python('-V').startswith(version)) def test_main(): test.test_support.run_unittest(CmdLineTest) if __name__ == "__main__": test_main() --- NEW FILE: test_coding.py --- import test.test_support, unittest import os class CodingTest(unittest.TestCase): def test_bad_coding(self): module_name = 'bad_coding' self.assertRaises(SyntaxError, __import__, 'test.' + module_name) path = os.path.dirname(__file__) filename = os.path.join(path, module_name + '.py') fp = open(filename) text = fp.read() fp.close() self.assertRaises(SyntaxError, compile, text, filename, 'exec') def test_main(): test.test_support.run_unittest(CodingTest) if __name__ == "__main__": test_main() --- NEW FILE: test_float.py --- import unittest, struct from test import test_support class FormatFunctionsTestCase(unittest.TestCase): def setUp(self): self.save_formats = {'double':float.__getformat__('double'), 'float':float.__getformat__('float')} def tearDown(self): float.__setformat__('double', self.save_formats['double']) float.__setformat__('float', self.save_formats['float']) def test_getformat(self): self.assert_(float.__getformat__('double') in ['unknown', 'IEEE, big-endian', 'IEEE, little-endian']) self.assert_(float.__getformat__('float') in ['unknown', 'IEEE, big-endian', 'IEEE, little-endian']) self.assertRaises(ValueError, float.__getformat__, 'chicken') self.assertRaises(TypeError, float.__getformat__, 1) def test_setformat(self): for t in 'double', 'float': float.__setformat__(t, 'unknown') if self.save_formats[t] == 'IEEE, big-endian': self.assertRaises(ValueError, float.__setformat__, t, 'IEEE, little-endian') elif self.save_formats[t] == 'IEEE, little-endian': self.assertRaises(ValueError, float.__setformat__, t, 'IEEE, big-endian') else: self.assertRaises(ValueError, float.__setformat__, t, 'IEEE, big-endian') self.assertRaises(ValueError, float.__setformat__, t, 'IEEE, little-endian') self.assertRaises(ValueError, float.__setformat__, t, 'chicken') self.assertRaises(ValueError, float.__setformat__, 'chicken', 'unknown') BE_DOUBLE_INF = '\x7f\xf0\x00\x00\x00\x00\x00\x00' LE_DOUBLE_INF = ''.join(reversed(BE_DOUBLE_INF)) BE_DOUBLE_NAN = '\x7f\xf8\x00\x00\x00\x00\x00\x00' LE_DOUBLE_NAN = ''.join(reversed(BE_DOUBLE_NAN)) BE_FLOAT_INF = '\x7f\x80\x00\x00' LE_FLOAT_INF = ''.join(reversed(BE_FLOAT_INF)) BE_FLOAT_NAN = '\x7f\xc0\x00\x00' LE_FLOAT_NAN = ''.join(reversed(BE_FLOAT_NAN)) # on non-IEEE platforms, attempting to unpack a bit pattern # representing an infinity or a NaN should raise an exception. class UnknownFormatTestCase(unittest.TestCase): def setUp(self): self.save_formats = {'double':float.__getformat__('double'), 'float':float.__getformat__('float')} float.__setformat__('double', 'unknown') float.__setformat__('float', 'unknown') def tearDown(self): float.__setformat__('double', self.save_formats['double']) float.__setformat__('float', self.save_formats['float']) def test_double_specials_dont_unpack(self): for fmt, data in [('>d', BE_DOUBLE_INF), ('>d', BE_DOUBLE_NAN), ('f', BE_FLOAT_INF), ('>f', BE_FLOAT_NAN), ('d', BE_DOUBLE_INF), ('>d', BE_DOUBLE_NAN), ('f', BE_FLOAT_INF), ('>f', BE_FLOAT_NAN), ('> 4) & 0xF] + h[i & 0xF] return r class HashLibTestCase(unittest.TestCase): supported_hash_names = ( 'md5', 'MD5', 'sha1', 'SHA1', 'sha224', 'SHA224', 'sha256', 'SHA256', 'sha384', 'SHA384', 'sha512', 'SHA512' ) def test_unknown_hash(self): try: hashlib.new('spam spam spam spam spam') except ValueError: pass else: self.assert_(0 == "hashlib didn't reject bogus hash name") def test_hexdigest(self): for name in self.supported_hash_names: h = hashlib.new(name) self.assert_(hexstr(h.digest()) == h.hexdigest()) def test_large_update(self): aas = 'a' * 128 bees = 'b' * 127 cees = 'c' * 126 for name in self.supported_hash_names: m1 = hashlib.new(name) m1.update(aas) m1.update(bees) m1.update(cees) m2 = hashlib.new(name) m2.update(aas + bees + cees) self.assertEqual(m1.digest(), m2.digest()) def check(self, name, data, digest): # test the direct constructors computed = getattr(hashlib, name)(data).hexdigest() self.assert_(computed == digest) # test the general new() interface computed = hashlib.new(name, data).hexdigest() self.assert_(computed == digest) def test_case_md5_0(self): self.check('md5', '', 'd41d8cd98f00b204e9800998ecf8427e') def test_case_md5_1(self): self.check('md5', 'abc', '900150983cd24fb0d6963f7d28e17f72') def test_case_md5_2(self): self.check('md5', 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', 'd174ab98d277d9f5a5611c2c9f419d9f') # use the three examples from Federal Information Processing Standards # Publication 180-1, Secure Hash Standard, 1995 April 17 # http://www.itl.nist.gov/div897/pubs/fip180-1.htm def test_case_sha1_0(self): self.check('sha1', "", "da39a3ee5e6b4b0d3255bfef95601890afd80709") def test_case_sha1_1(self): self.check('sha1', "abc", "a9993e364706816aba3e25717850c26c9cd0d89d") def test_case_sha1_2(self): self.check('sha1', "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", "84983e441c3bd26ebaae4aa1f95129e5e54670f1") def test_case_sha1_3(self): self.check('sha1', "a" * 1000000, "34aa973cd4c4daa4f61eeb2bdbad27316534016f") # use the examples from Federal Information Processing Standards # Publication 180-2, Secure Hash Standard, 2002 August 1 # http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf def test_case_sha224_0(self): self.check('sha224', "", "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f") def test_case_sha224_1(self): self.check('sha224', "abc", "23097d223405d8228642a477bda255b32aadbce4bda0b3f7e36c9da7") def test_case_sha224_2(self): self.check('sha224', "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", "75388b16512776cc5dba5da1fd890150b0c6455cb4f58b1952522525") def test_case_sha224_3(self): self.check('sha224', "a" * 1000000, "20794655980c91d8bbb4c1ea97618a4bf03f42581948b2ee4ee7ad67") def test_case_sha256_0(self): self.check('sha256', "", "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") def test_case_sha256_1(self): self.check('sha256', "abc", "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad") def test_case_sha256_2(self): self.check('sha256', "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", "248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1") def test_case_sha256_3(self): self.check('sha256', "a" * 1000000, "cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0") def test_case_sha384_0(self): self.check('sha384', "", "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da"+ "274edebfe76f65fbd51ad2f14898b95b") def test_case_sha384_1(self): self.check('sha384', "abc", "cb00753f45a35e8bb5a03d699ac65007272c32ab0eded1631a8b605a43ff5bed"+ "8086072ba1e7cc2358baeca134c825a7") def test_case_sha384_2(self): self.check('sha384', "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"+ "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", "09330c33f71147e83d192fc782cd1b4753111b173b3b05d22fa08086e3b0f712"+ "fcc7c71a557e2db966c3e9fa91746039") def test_case_sha384_3(self): self.check('sha384', "a" * 1000000, "9d0e1809716474cb086e834e310a4a1ced149e9c00f248527972cec5704c2a5b"+ "07b8b3dc38ecc4ebae97ddd87f3d8985") def test_case_sha512_0(self): self.check('sha512', "", "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce"+ "47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e") def test_case_sha512_1(self): self.check('sha512', "abc", "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a"+ "2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f") def test_case_sha512_2(self): self.check('sha512', "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"+ "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", "8e959b75dae313da8cf4f72814fc143f8f7779c6eb9f7fa17299aeadb6889018"+ "501d289e4900f7e4331b99dec4b5433ac7d329eeb6dd26545e96e55b874be909") def test_case_sha512_3(self): self.check('sha512', "a" * 1000000, "e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973eb"+ "de0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b") def test_main(): test_support.run_unittest(HashLibTestCase) if __name__ == "__main__": test_main() --- NEW FILE: test_hashlib_speed.py --- import sys, time import hashlib from test import test_support def creatorFunc(): raise RuntimeError, "eek, creatorFunc not overridden" def test_scaled_msg(scale, name): iterations = 106201/scale * 20 longStr = 'Z'*scale localCF = creatorFunc start = time.time() for f in xrange(iterations): x = localCF(longStr).digest() end = time.time() print ('%2.2f' % (end-start)), "seconds", iterations, "x", len(longStr), "bytes", name def test_create(): start = time.time() for f in xrange(20000): d = creatorFunc() end = time.time() print ('%2.2f' % (end-start)), "seconds", '[20000 creations]' def test_zero(): start = time.time() for f in xrange(20000): x = creatorFunc().digest() end = time.time() print ('%2.2f' % (end-start)), "seconds", '[20000 "" digests]' ### this 'test' is not normally run. skip it if the test runner finds it if __name__ != '__main__': raise test_support.TestSkipped, "not a unit test (stand alone benchmark)" hName = sys.argv[1] # # setup our creatorFunc to test the requested hash # if hName in ('_md5', '_sha'): exec 'import '+hName exec 'creatorFunc = '+hName+'.new' print "testing speed of old", hName, "legacy interface" elif hName == '_hashlib' and len(sys.argv) > 3: import _hashlib exec 'creatorFunc = _hashlib.%s' % sys.argv[2] print "testing speed of _hashlib.%s" % sys.argv[2], getattr(_hashlib, sys.argv[2]) elif hName == '_hashlib' and len(sys.argv) == 3: import _hashlib exec 'creatorFunc = lambda x=_hashlib.new : x(%r)' % sys.argv[2] print "testing speed of _hashlib.new(%r)" % sys.argv[2] elif hasattr(hashlib, hName) and callable(getattr(hashlib, hName)): creatorFunc = getattr(hashlib, hName) print "testing speed of hashlib."+hName, getattr(hashlib, hName) else: exec "creatorFunc = lambda x=hashlib.new : x(%r)" % hName print "testing speed of hashlib.new(%r)" % hName try: test_create() except ValueError: print print "pass argument(s) naming the hash to run a speed test on:" print " '_md5' and '_sha' test the legacy builtin md5 and sha" print " '_hashlib' 'openssl_hName' 'fast' tests the builtin _hashlib" print " '_hashlib' 'hName' tests builtin _hashlib.new(shaFOO)" print " 'hName' tests the hashlib.hName() implementation if it exists" print " otherwise it uses hashlib.new(hName)." print raise test_zero() test_scaled_msg(scale=106201, name='[huge data]') test_scaled_msg(scale=10620, name='[large data]') test_scaled_msg(scale=1062, name='[medium data]') test_scaled_msg(scale=424, name='[4*small data]') test_scaled_msg(scale=336, name='[3*small data]') test_scaled_msg(scale=212, name='[2*small data]') test_scaled_msg(scale=106, name='[small data]') test_scaled_msg(scale=creatorFunc().digest_size, name='[digest_size data]') test_scaled_msg(scale=10, name='[tiny data]') --- NEW FILE: test_xdrlib.py --- import xdrlib xdrlib._test() Index: inspect_fodder2.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/inspect_fodder2.py,v retrieving revision 1.2.2.1 retrieving revision 1.2.2.2 diff -u -d -r1.2.2.1 -r1.2.2.2 --- inspect_fodder2.py 7 Jan 2005 06:58:48 -0000 1.2.2.1 +++ inspect_fodder2.py 16 Oct 2005 05:24:00 -0000 1.2.2.2 @@ -1,25 +1,25 @@ # line 1 def wrap(foo=None): - def wrapper(func): - return func - return wrapper + def wrapper(func): + return func + return wrapper # line 7 def replace(func): - def insteadfunc(): - print 'hello' - return insteadfunc + def insteadfunc(): + print 'hello' + return insteadfunc # line 13 @wrap() @wrap(wrap) def wrapped(): - pass + pass # line 19 @replace def gone(): - pass + pass # line 24 oll = lambda m: m @@ -53,3 +53,38 @@ def setfunc(func): globals()["anonymous"] = func setfunc(lambda x, y: x*y) + +# line 57 +def with_comment(): # hello + world + +# line 61 +multiline_sig = [ + lambda (x, + y): x+y, + None, + ] + +# line 68 +def func69(): + class cls70: + def func71(): + pass + return cls70 +extra74 = 74 + +# line 76 +def func77(): pass +(extra78, stuff78) = 'xy' +extra79 = 'stop' + +# line 81 +class cls82: + def func83(): pass +(extra84, stuff84) = 'xy' +extra85 = 'stop' + +# line 87 +def func88(): + # comment + return 90 Index: list_tests.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/list_tests.py,v retrieving revision 1.5.4.1 retrieving revision 1.5.4.2 diff -u -d -r1.5.4.1 -r1.5.4.2 --- list_tests.py 7 Jan 2005 06:58:48 -0000 1.5.4.1 +++ list_tests.py 16 Oct 2005 05:24:00 -0000 1.5.4.2 @@ -309,6 +309,26 @@ a = self.type2test([0, 1, 2, 3]) self.assertRaises(BadExc, a.remove, BadCmp()) + class BadCmp2: + def __eq__(self, other): + raise BadExc() + + d = self.type2test('abcdefghcij') + d.remove('c') + self.assertEqual(d, self.type2test('abdefghcij')) + d.remove('c') + self.assertEqual(d, self.type2test('abdefghij')) + self.assertRaises(ValueError, d.remove, 'c') + self.assertEqual(d, self.type2test('abdefghij')) + + # Handle comparison errors + d = self.type2test(['a', 'b', BadCmp2(), 'c']) + e = self.type2test(d) + self.assertRaises(BadExc, d.remove, 'c') + for x, y in zip(d, e): + # verify that original order and values are retained. + self.assert_(x is y) + def test_count(self): a = self.type2test([0, 1, 2])*3 self.assertEqual(a.count(0), 3) @@ -494,3 +514,12 @@ a = self.type2test(range(10)) a[::2] = tuple(range(5)) self.assertEqual(a, self.type2test([0, 1, 1, 3, 2, 5, 3, 7, 4, 9])) + + def test_constructor_exception_handling(self): + # Bug #1242657 + class F(object): + def __iter__(self): + yield 23 + def __len__(self): + raise KeyboardInterrupt + self.assertRaises(KeyboardInterrupt, list, F()) Index: regrtest.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/regrtest.py,v retrieving revision 1.87.2.2 retrieving revision 1.87.2.3 diff -u -d -r1.87.2.2 -r1.87.2.3 --- regrtest.py 7 Jan 2005 06:58:48 -0000 1.87.2.2 +++ regrtest.py 16 Oct 2005 05:24:00 -0000 1.87.2.3 @@ -91,7 +91,11 @@ compiler - Test the compiler package by compiling all the source in the standard library and test suite. This takes - a long time. + a long time. Enabling this resource also allows + test_tokenize to verify round-trip lexing on every + file in the test library. + + subprocess Run all tests for the subprocess module. To enable all resources except one, use '-uall,-'. For example, to run all the tests except for the bsddb tests, give the @@ -136,7 +140,7 @@ from test import test_support RESOURCE_NAMES = ('audio', 'curses', 'largefile', 'network', 'bsddb', - 'decimal', 'compiler') + 'decimal', 'compiler', 'subprocess') def usage(code, msg=''): @@ -487,6 +491,7 @@ import gc def cleanup(): import _strptime, urlparse, warnings, dircache + import linecache from distutils.dir_util import _path_created _path_created.clear() warnings.filters[:] = fs @@ -499,6 +504,7 @@ sys.path_importer_cache.clear() sys.path_importer_cache.update(pic) dircache.reset() + linecache.clearcache() if indirect_test: def run_the_test(): indirect_test() @@ -1064,9 +1070,38 @@ test_winreg test_winsound """, + 'aix5': + """ + test_aepack + test_al + test_applesingle + test_bsddb + test_bsddb185 + test_bsddb3 + test_bz2 + test_cd + test_cl + test_dl + test_gdbm + test_gl + test_gzip + test_imgfile + test_linuxaudiodev + test_macfs + test_macostools + test_nis + test_ossaudiodev + test_sunaudiodev + test_tcl + test_winreg + test_winsound + test_zipimport + test_zlib + """, } _expectations['freebsd5'] = _expectations['freebsd4'] _expectations['freebsd6'] = _expectations['freebsd4'] +_expectations['freebsd7'] = _expectations['freebsd4'] class _ExpectedSkips: def __init__(self): @@ -1083,6 +1118,9 @@ s = _expectations[sys.platform] self.expected = set(s.split()) + # this isn't a regularly run unit test, it is always skipped + self.expected.add('test_hashlib_speed') + if not os.path.supports_unicode_filenames: self.expected.add('test_pep277') Index: seq_tests.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/seq_tests.py,v retrieving revision 1.3.4.1 retrieving revision 1.3.4.2 diff -u -d -r1.3.4.1 -r1.3.4.2 --- seq_tests.py 7 Jan 2005 06:58:49 -0000 1.3.4.1 +++ seq_tests.py 16 Oct 2005 05:24:00 -0000 1.3.4.2 @@ -5,6 +5,85 @@ import unittest from test import test_support +# Various iterables +# This is used for checking the constructor (here and in test_deque.py) +def iterfunc(seqn): + 'Regular generator' + for i in seqn: + yield i + +class Sequence: + 'Sequence using __getitem__' + def __init__(self, seqn): + self.seqn = seqn + def __getitem__(self, i): + return self.seqn[i] + +class IterFunc: + 'Sequence using iterator protocol' + def __init__(self, seqn): + self.seqn = seqn + self.i = 0 + def __iter__(self): + return self + def next(self): + if self.i >= len(self.seqn): raise StopIteration + v = self.seqn[self.i] + self.i += 1 + return v + +class IterGen: + 'Sequence using iterator protocol defined with a generator' + def __init__(self, seqn): + self.seqn = seqn + self.i = 0 + def __iter__(self): + for val in self.seqn: + yield val + +class IterNextOnly: + 'Missing __getitem__ and __iter__' + def __init__(self, seqn): + self.seqn = seqn + self.i = 0 + def next(self): + if self.i >= len(self.seqn): raise StopIteration + v = self.seqn[self.i] + self.i += 1 + return v + +class IterNoNext: + 'Iterator missing next()' + def __init__(self, seqn): + self.seqn = seqn + self.i = 0 + def __iter__(self): + return self + +class IterGenExc: + 'Test propagation of exceptions' + def __init__(self, seqn): + self.seqn = seqn + self.i = 0 + def __iter__(self): + return self + def next(self): + 3 // 0 + +class IterFuncStop: + 'Test immediate stop' + def __init__(self, seqn): + pass + def __iter__(self): + return self + def next(self): + raise StopIteration + +from itertools import chain, imap +def itermulti(seqn): + 'Test multiple tiers of iterators' + return chain(imap(lambda x:x, iterfunc(IterGen(Sequence(seqn))))) + class CommonTest(unittest.TestCase): # The type to be tested type2test = None @@ -40,6 +119,17 @@ vv = self.type2test(s) self.assertEqual(len(vv), len(s)) + # Create from various iteratables + for s in ("123", "", range(1000), ('do', 1.2), xrange(2000,2200,5)): + for g in (Sequence, IterFunc, IterGen, + itermulti, iterfunc): + self.assertEqual(self.type2test(g(s)), self.type2test(s)) + self.assertEqual(self.type2test(IterFuncStop(s)), self.type2test()) + self.assertEqual(self.type2test(c for c in "123"), self.type2test("123")) + self.assertRaises(TypeError, self.type2test, IterNextOnly(s)) + self.assertRaises(TypeError, self.type2test, IterNoNext(s)) + self.assertRaises(ZeroDivisionError, self.type2test, IterGenExc(s)) + def test_truth(self): self.assert_(not self.type2test()) self.assert_(self.type2test([42])) @@ -117,6 +207,33 @@ self.assertRaises(TypeError, u.__contains__) + def test_contains_fake(self): + class AllEq: + # Sequences must use rich comparison against each item + # (unless "is" is true, or an earlier item answered) + # So instances of AllEq must be found in all non-empty sequences. + def __eq__(self, other): + return True + def __hash__(self): + raise NotImplemented + self.assert_(AllEq() not in self.type2test([])) + self.assert_(AllEq() in self.type2test([1])) + + def test_contains_order(self): + # Sequences must test in-order. If a rich comparison has side + # effects, these will be visible to tests against later members. + # In this test, the "side effect" is a short-circuiting raise. + class DoNotTestEq(Exception): + pass + class StopCompares: + def __eq__(self, other): + raise DoNotTestEq + + checkfirst = self.type2test([1, StopCompares()]) + self.assert_(1 in checkfirst) + checklast = self.type2test([StopCompares(), 1]) + self.assertRaises(DoNotTestEq, checklast.__contains__, 1) + def test_len(self): self.assertEqual(len(self.type2test()), 0) self.assertEqual(len(self.type2test([])), 0) Index: string_tests.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/string_tests.py,v retrieving revision 1.18.2.2 retrieving revision 1.18.2.3 diff -u -d -r1.18.2.2 -r1.18.2.3 --- string_tests.py 7 Jan 2005 06:58:49 -0000 1.18.2.2 +++ string_tests.py 16 Oct 2005 05:24:00 -0000 1.18.2.3 @@ -114,6 +114,33 @@ self.checkraises(TypeError, 'hello', 'count') self.checkraises(TypeError, 'hello', 'count', 42) + # For a variety of combinations, + # verify that str.count() matches an equivalent function + # replacing all occurrences and then differencing the string lengths + charset = ['', 'a', 'b'] + digits = 7 + base = len(charset) + teststrings = set() + for i in xrange(base ** digits): + entry = [] + for j in xrange(digits): + i, m = divmod(i, base) + entry.append(charset[m]) + teststrings.add(''.join(entry)) + teststrings = list(teststrings) + for i in teststrings: + i = self.fixtype(i) + n = len(i) + for j in teststrings: + r1 = i.count(j) + if j: + r2, rem = divmod(n - len(i.replace(j, '')), len(j)) + else: + r2, rem = len(i)+1, 0 + if rem or r1 != r2: + self.assertEqual(rem, 0) + self.assertEqual(r1, r2) + def test_find(self): self.checkequal(0, 'abcdefghiabc', 'find', 'abc') self.checkequal(9, 'abcdefghiabc', 'find', 'abc', 1) @@ -122,6 +149,31 @@ self.checkraises(TypeError, 'hello', 'find') self.checkraises(TypeError, 'hello', 'find', 42) + # For a variety of combinations, + # verify that str.find() matches __contains__ + # and that the found substring is really at that location + charset = ['', 'a', 'b', 'c'] + digits = 5 + base = len(charset) + teststrings = set() + for i in xrange(base ** digits): + entry = [] + for j in xrange(digits): + i, m = divmod(i, base) + entry.append(charset[m]) + teststrings.add(''.join(entry)) + teststrings = list(teststrings) + for i in teststrings: + i = self.fixtype(i) + for j in teststrings: + loc = i.find(j) + r1 = (loc != -1) + r2 = j in i + if r1 != r2: + self.assertEqual(r1, r2) + if loc != -1: + self.assertEqual(i[loc:loc+len(j)], j) + def test_rfind(self): self.checkequal(9, 'abcdefghiabc', 'rfind', 'abc') self.checkequal(12, 'abcdefghiabc', 'rfind', '') @@ -685,26 +737,27 @@ # Additional tests that only work with # 8bit compatible object, i.e. str and UserString - def test_encoding_decoding(self): - codecs = [('rot13', 'uryyb jbeyq'), - ('base64', 'aGVsbG8gd29ybGQ=\n'), - ('hex', '68656c6c6f20776f726c64'), - ('uu', 'begin 666 \n+:&5L;&\\@=V]R;&0 \n \nend\n')] - for encoding, data in codecs: - self.checkequal(data, 'hello world', 'encode', encoding) - self.checkequal('hello world', data, 'decode', encoding) - # zlib is optional, so we make the test optional too... - try: - import zlib - except ImportError: - pass - else: - data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]' - self.checkequal(data, 'hello world', 'encode', 'zlib') - self.checkequal('hello world', data, 'decode', 'zlib') + if test_support.have_unicode: + def test_encoding_decoding(self): + codecs = [('rot13', 'uryyb jbeyq'), + ('base64', 'aGVsbG8gd29ybGQ=\n'), + ('hex', '68656c6c6f20776f726c64'), + ('uu', 'begin 666 \n+:&5L;&\\@=V]R;&0 \n \nend\n')] + for encoding, data in codecs: + self.checkequal(data, 'hello world', 'encode', encoding) + self.checkequal('hello world', data, 'decode', encoding) + # zlib is optional, so we make the test optional too... + try: + import zlib + except ImportError: + pass + else: + data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]' + self.checkequal(data, 'hello world', 'encode', 'zlib') + self.checkequal('hello world', data, 'decode', 'zlib') - self.checkraises(TypeError, 'xyz', 'decode', 42) - self.checkraises(TypeError, 'xyz', 'encode', 42) + self.checkraises(TypeError, 'xyz', 'decode', 42) + self.checkraises(TypeError, 'xyz', 'encode', 42) class MixinStrUnicodeTest: Index: test_StringIO.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_StringIO.py,v retrieving revision 1.13.2.2 retrieving revision 1.13.2.3 diff -u -d -r1.13.2.2 -r1.13.2.3 --- test_StringIO.py 7 Jan 2005 06:58:49 -0000 1.13.2.2 +++ test_StringIO.py 16 Oct 2005 05:24:00 -0000 1.13.2.3 @@ -44,6 +44,13 @@ f.seek(0) self.assertEqual(f.getvalue(), 'abc') + def test_writelines_error(self): + def errorGen(): + yield 'a' + raise KeyboardInterrupt() + f = self.MODULE.StringIO() + self.assertRaises(KeyboardInterrupt, f.writelines, errorGen()) + def test_truncate(self): eq = self.assertEqual f = self.MODULE.StringIO() Index: test__locale.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test__locale.py,v retrieving revision 1.8.4.1 retrieving revision 1.8.4.2 diff -u -d -r1.8.4.1 -r1.8.4.2 --- test__locale.py 7 Jan 2005 06:58:49 -0000 1.8.4.1 +++ test__locale.py 16 Oct 2005 05:24:00 -0000 1.8.4.2 @@ -2,6 +2,12 @@ from _locale import (setlocale, LC_NUMERIC, RADIXCHAR, THOUSEP, nl_langinfo, localeconv, Error) import unittest +from platform import uname + +if uname()[0] == "Darwin": + maj, min, mic = [int(part) for part in uname()[2].split(".")] + if (maj, min, mic) < (8, 0, 0): + raise TestSkipped("locale support broken for OS X < 10.4") candidate_locales = ['es_UY', 'fr_FR', 'fi_FI', 'es_CO', 'pt_PT', 'it_IT', 'et_EE', 'es_PY', 'no_NO', 'nl_NL', 'lv_LV', 'el_GR', 'be_BY', 'fr_BE', @@ -11,9 +17,14 @@ 'da_DK', 'nn_NO', 'cs_CZ', 'de_LU', 'es_BO', 'sq_AL', 'sk_SK', 'fr_CH', 'de_DE', 'sr_YU', 'br_FR', 'nl_BE', 'sv_FI', 'pl_PL', 'fr_CA', 'fo_FO', 'bs_BA', 'fr_LU', 'kl_GL', 'fa_IR', 'de_BE', 'sv_SE', 'it_CH', 'uk_UA', - 'eu_ES', 'vi_VN', 'af_ZA', 'nb_NO', 'en_DK', 'tg_TJ', + 'eu_ES', 'vi_VN', 'af_ZA', 'nb_NO', 'en_DK', 'tg_TJ', 'en_US', 'es_ES.ISO8859-1', 'fr_FR.ISO8859-15', 'ru_RU.KOI8-R', 'ko_KR.eucKR'] +# List known locale values to test against when available. +# Dict formatted as `` : (, )``. If a +# value is not known, use '' . +known_numerics = {'fr_FR' : (',', ''), 'en_US':('.', ',')} + class _LocaleTests(unittest.TestCase): def setUp(self): @@ -22,7 +33,50 @@ def tearDown(self): setlocale(LC_NUMERIC, self.oldlocale) - def test_lc_numeric(self): + # Want to know what value was calculated, what it was compared against, + # what function was used for the calculation, what type of data was used, + # the locale that was supposedly set, and the actual locale that is set. + lc_numeric_err_msg = "%s != %s (%s for %s; set to %s, using %s)" + + def numeric_tester(self, calc_type, calc_value, data_type, used_locale): + """Compare calculation against known value, if available""" + try: + set_locale = setlocale(LC_NUMERIC) + except Error: + set_locale = "" + known_value = known_numerics.get(used_locale, + ('', ''))[data_type is 'thousands_sep'] + if known_value and calc_value: + self.assertEquals(calc_value, known_value, + self.lc_numeric_err_msg % ( + calc_value, known_value, + calc_type, data_type, set_locale, + used_locale)) + + def test_lc_numeric_nl_langinfo(self): + # Test nl_langinfo against known values + for loc in candidate_locales: + try: + setlocale(LC_NUMERIC, loc) + except Error: + continue + for li, lc in ((RADIXCHAR, "decimal_point"), + (THOUSEP, "thousands_sep")): + self.numeric_tester('nl_langinfo', nl_langinfo(li), lc, loc) + + def test_lc_numeric_localeconv(self): + # Test localeconv against known values + for loc in candidate_locales: + try: + setlocale(LC_NUMERIC, loc) + except Error: + continue + for li, lc in ((RADIXCHAR, "decimal_point"), + (THOUSEP, "thousands_sep")): + self.numeric_tester('localeconv', localeconv()[lc], lc, loc) + + def test_lc_numeric_basic(self): + # Test nl_langinfo against localeconv for loc in candidate_locales: try: setlocale(LC_NUMERIC, loc) @@ -32,18 +86,17 @@ (THOUSEP, "thousands_sep")): nl_radixchar = nl_langinfo(li) li_radixchar = localeconv()[lc] - # Both with seeing what the locale is set to in order to detect - # when setlocale lies and says it accepted the locale setting - # but in actuality didn't use it (as seen in OS X 10.3) try: set_locale = setlocale(LC_NUMERIC) except Error: set_locale = "" self.assertEquals(nl_radixchar, li_radixchar, - "%s != %s (%s); " - "supposed to be %s, set to %s" % - (nl_radixchar, li_radixchar, lc, - loc, set_locale)) + "%s (nl_langinfo) != %s (localeconv) " + "(set to %s, using %s)" % ( + nl_radixchar, li_radixchar, + loc, set_locale)) + + def test_main(): run_unittest(_LocaleTests) Index: test_al.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_al.py,v retrieving revision 1.6.26.1 retrieving revision 1.6.26.2 diff -u -d -r1.6.26.1 -r1.6.26.2 --- test_al.py 28 Apr 2003 17:29:11 -0000 1.6.26.1 +++ test_al.py 16 Oct 2005 05:24:00 -0000 1.6.26.2 @@ -8,7 +8,7 @@ alattrs = ['__doc__', '__name__', 'getdefault', 'getminmax', 'getname', 'getparams', 'newconfig', 'openport', 'queryparams', 'setparams'] -# This is a very unobtrusive test for the existence of the al module and all it's +# This is a very unobtrusive test for the existence of the al module and all its # attributes. More comprehensive examples can be found in Demo/al def main(): Index: test_array.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_array.py,v retrieving revision 1.16.2.2 retrieving revision 1.16.2.3 diff -u -d -r1.16.2.2 -r1.16.2.3 --- test_array.py 7 Jan 2005 06:58:49 -0000 1.16.2.2 +++ test_array.py 16 Oct 2005 05:24:00 -0000 1.16.2.3 @@ -428,6 +428,11 @@ ) self.assertEqual( + a[2:1], + array.array(self.typecode) + ) + + self.assertEqual( a[1000:], array.array(self.typecode) ) Index: test_asynchat.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_asynchat.py,v retrieving revision 1.4.24.1 retrieving revision 1.4.24.2 diff -u -d -r1.4.24.1 -r1.4.24.2 --- test_asynchat.py 7 Jan 2005 06:58:49 -0000 1.4.24.1 +++ test_asynchat.py 16 Oct 2005 05:24:00 -0000 1.4.24.2 @@ -2,9 +2,11 @@ import thread # If this fails, we can't test this module import asyncore, asynchat, socket, threading, time +import unittest +from test import test_support HOST = "127.0.0.1" -PORT = 54321 +PORT = 54322 class echo_server(threading.Thread): @@ -16,7 +18,7 @@ conn, client = sock.accept() buffer = "" while "\n" not in buffer: - data = conn.recv(10) + data = conn.recv(1) if not data: break buffer = buffer + data @@ -28,31 +30,63 @@ class echo_client(asynchat.async_chat): - def __init__(self): + def __init__(self, terminator): asynchat.async_chat.__init__(self) + self.contents = None self.create_socket(socket.AF_INET, socket.SOCK_STREAM) self.connect((HOST, PORT)) - self.set_terminator("\n") + self.set_terminator(terminator) self.buffer = "" def handle_connect(self): - print "Connected" + pass + ##print "Connected" def collect_incoming_data(self, data): self.buffer = self.buffer + data def found_terminator(self): - print "Received:", repr(self.buffer) + #print "Received:", repr(self.buffer) + self.contents = self.buffer self.buffer = "" self.close() -def main(): - s = echo_server() - s.start() - time.sleep(1) # Give server time to initialize - c = echo_client() - c.push("hello ") - c.push("world\n") - asyncore.loop() -main() +class TestAsynchat(unittest.TestCase): + def setUp (self): + pass + + def tearDown (self): + pass + + def test_line_terminator(self): + s = echo_server() + s.start() + time.sleep(1) # Give server time to initialize + c = echo_client('\n') + c.push("hello ") + c.push("world\n") + asyncore.loop() + s.join() + + self.assertEqual(c.contents, 'hello world') + + def test_numeric_terminator(self): + # Try reading a fixed number of bytes + s = echo_server() + s.start() + time.sleep(1) # Give server time to initialize + c = echo_client(6L) + c.push("hello ") + c.push("world\n") + asyncore.loop() + s.join() + + self.assertEqual(c.contents, 'hello ') + + +def test_main(verbose=None): + test_support.run_unittest(TestAsynchat) + +if __name__ == "__main__": + test_main(verbose=True) Index: test_base64.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_base64.py,v retrieving revision 1.2.2.2 retrieving revision 1.2.2.3 diff -u -d -r1.2.2.2 -r1.2.2.3 --- test_base64.py 7 Jan 2005 06:58:49 -0000 1.2.2.2 +++ test_base64.py 16 Oct 2005 05:24:00 -0000 1.2.2.3 @@ -60,6 +60,7 @@ eq = self.assertEqual # Test default alphabet eq(base64.b64encode("www.python.org"), "d3d3LnB5dGhvbi5vcmc=") + eq(base64.b64encode('\x00'), 'AA==') eq(base64.b64encode("a"), "YQ==") eq(base64.b64encode("ab"), "YWI=") eq(base64.b64encode("abc"), "YWJj") @@ -90,6 +91,7 @@ def test_b64decode(self): eq = self.assertEqual eq(base64.b64decode("d3d3LnB5dGhvbi5vcmc="), "www.python.org") + eq(base64.b64decode('AA=='), '\x00') eq(base64.b64decode("YQ=="), "a") eq(base64.b64decode("YWI="), "ab") eq(base64.b64decode("YWJj"), "abc") @@ -123,6 +125,7 @@ def test_b32encode(self): eq = self.assertEqual eq(base64.b32encode(''), '') + eq(base64.b32encode('\x00'), 'AA======') eq(base64.b32encode('a'), 'ME======') eq(base64.b32encode('ab'), 'MFRA====') eq(base64.b32encode('abc'), 'MFRGG===') @@ -132,6 +135,7 @@ def test_b32decode(self): eq = self.assertEqual eq(base64.b32decode(''), '') + eq(base64.b32decode('AA======'), '\x00') eq(base64.b32decode('ME======'), 'a') eq(base64.b32decode('MFRA===='), 'ab') eq(base64.b32decode('MFRGG==='), 'abc') @@ -166,10 +170,12 @@ def test_b16encode(self): eq = self.assertEqual eq(base64.b16encode('\x01\x02\xab\xcd\xef'), '0102ABCDEF') + eq(base64.b16encode('\x00'), '00') def test_b16decode(self): eq = self.assertEqual eq(base64.b16decode('0102ABCDEF'), '\x01\x02\xab\xcd\xef') + eq(base64.b16decode('00'), '\x00') # Lower case is not allowed without a flag self.assertRaises(TypeError, base64.b16decode, '0102abcdef') # Case fold Index: test_binop.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_binop.py,v retrieving revision 1.6.14.1 retrieving revision 1.6.14.2 diff -u -d -r1.6.14.1 -r1.6.14.2 --- test_binop.py 28 Apr 2003 17:29:08 -0000 1.6.14.1 +++ test_binop.py 16 Oct 2005 05:24:00 -0000 1.6.14.2 @@ -178,7 +178,7 @@ return (x, self - other * x) def __rdivmod__(self, other): - "Divide two Rats, returning quotient and remainder (reversed args).""" + """Divide two Rats, returning quotient and remainder (reversed args).""" if isint(other): other = Rat(other) elif not isRat(other): Index: test_bisect.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_bisect.py,v retrieving revision 1.1.26.2 retrieving revision 1.1.26.3 diff -u -d -r1.1.26.2 -r1.1.26.3 --- test_bisect.py 7 Jan 2005 06:58:49 -0000 1.1.26.2 +++ test_bisect.py 16 Oct 2005 05:24:00 -0000 1.1.26.3 @@ -130,6 +130,16 @@ def test_backcompatibility(self): self.assertEqual(bisect, bisect_right) + def test_keyword_args(self): + data = [10, 20, 30, 40, 50] + self.assertEqual(bisect_left(a=data, x=25, lo=1, hi=3), 2) + self.assertEqual(bisect_right(a=data, x=25, lo=1, hi=3), 2) + self.assertEqual(bisect(a=data, x=25, lo=1, hi=3), 2) + insort_left(a=data, x=25, lo=1, hi=3) + insort_right(a=data, x=25, lo=1, hi=3) + insort(a=data, x=25, lo=1, hi=3) + self.assertEqual(data, [10, 20, 25, 25, 25, 30, 40, 50]) + #============================================================================== class TestInsort(unittest.TestCase): Index: test_builtin.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_builtin.py,v retrieving revision 1.1.36.2 retrieving revision 1.1.36.3 diff -u -d -r1.1.36.2 -r1.1.36.3 --- test_builtin.py 7 Jan 2005 06:58:49 -0000 1.1.36.2 +++ test_builtin.py 16 Oct 2005 05:24:00 -0000 1.1.36.3 @@ -92,6 +92,14 @@ (unichr(0x200), ValueError), ] +class TestFailingBool: + def __nonzero__(self): + raise RuntimeError + +class TestFailingIter: + def __iter__(self): + raise RuntimeError + class BuiltinTest(unittest.TestCase): def test_import(self): @@ -117,6 +125,34 @@ # str self.assertRaises(TypeError, abs, 'a') + def test_all(self): + self.assertEqual(all([2, 4, 6]), True) + self.assertEqual(all([2, None, 6]), False) + self.assertRaises(RuntimeError, all, [2, TestFailingBool(), 6]) + self.assertRaises(RuntimeError, all, TestFailingIter()) + self.assertRaises(TypeError, all, 10) # Non-iterable + self.assertRaises(TypeError, all) # No args + self.assertRaises(TypeError, all, [2, 4, 6], []) # Too many args + self.assertEqual(all([]), True) # Empty iterator + S = [50, 60] + self.assertEqual(all(x > 42 for x in S), True) + S = [50, 40, 60] + self.assertEqual(all(x > 42 for x in S), False) + + def test_any(self): + self.assertEqual(any([None, None, None]), False) + self.assertEqual(any([None, 4, None]), True) + self.assertRaises(RuntimeError, any, [None, TestFailingBool(), 6]) + self.assertRaises(RuntimeError, all, TestFailingIter()) + self.assertRaises(TypeError, any, 10) # Non-iterable + self.assertRaises(TypeError, any) # No args + self.assertRaises(TypeError, any, [2, 4, 6], []) # Too many args + self.assertEqual(any([]), False) # Empty iterator + S = [40, 60, 30] + self.assertEqual(any(x > 42 for x in S), True) + S = [10, 20, 30] + self.assertEqual(any(x > 42 for x in S), False) + def test_apply(self): def f0(*args): self.assertEqual(args, ()) @@ -509,20 +545,53 @@ self.assertEqual(float(unicode(" 3.14 ")), 3.14) self.assertEqual(float(unicode(" \u0663.\u0661\u0664 ",'raw-unicode-escape')), 3.14) + def test_floatconversion(self): + # Make sure that calls to __float__() work properly + class Foo0: + def __float__(self): + return 42. + + class Foo1(object): + def __float__(self): + return 42. + + class Foo2(float): + def __float__(self): + return 42. + + class Foo3(float): + def __new__(cls, value=0.): + return float.__new__(cls, 2*value) + + def __float__(self): + return self + + class Foo4(float): + def __float__(self): + return 42 + + self.assertAlmostEqual(float(Foo0()), 42.) + self.assertAlmostEqual(float(Foo1()), 42.) + self.assertAlmostEqual(float(Foo2()), 42.) + self.assertAlmostEqual(float(Foo3(21)), 42.) + self.assertRaises(TypeError, float, Foo4(42)) + def test_getattr(self): import sys self.assert_(getattr(sys, 'stdout') is sys.stdout) self.assertRaises(TypeError, getattr, sys, 1) self.assertRaises(TypeError, getattr, sys, 1, "foo") self.assertRaises(TypeError, getattr) - self.assertRaises(UnicodeError, getattr, sys, unichr(sys.maxunicode)) + if have_unicode: + self.assertRaises(UnicodeError, getattr, sys, unichr(sys.maxunicode)) def test_hasattr(self): import sys self.assert_(hasattr(sys, 'stdout')) self.assertRaises(TypeError, hasattr, sys, 1) self.assertRaises(TypeError, hasattr) - self.assertRaises(UnicodeError, hasattr, sys, unichr(sys.maxunicode)) + if have_unicode: + self.assertRaises(UnicodeError, hasattr, sys, unichr(sys.maxunicode)) def test_hash(self): hash(None) @@ -614,6 +683,39 @@ self.assertEqual(int('0123', 0), 83) + def test_intconversion(self): + # Test __int__() + class Foo0: + def __int__(self): + return 42 + + class Foo1(object): + def __int__(self): + return 42 + + class Foo2(int): + def __int__(self): + return 42 + + class Foo3(int): + def __int__(self): + return self + + class Foo4(int): + def __int__(self): + return 42L + + class Foo5(int): + def __int__(self): + return 42. + + self.assertEqual(int(Foo0()), 42) + self.assertEqual(int(Foo1()), 42) + self.assertEqual(int(Foo2()), 42) + self.assertEqual(int(Foo3()), 0) + self.assertEqual(int(Foo4()), 42L) + self.assertRaises(TypeError, int, Foo5()) + def test_intern(self): self.assertRaises(TypeError, intern) s = "never interned before" @@ -774,6 +876,39 @@ self.assertRaises(ValueError, long, '53', 40) self.assertRaises(TypeError, long, 1, 12) + def test_longconversion(self): + # Test __long__() + class Foo0: + def __long__(self): + return 42L + + class Foo1(object): + def __long__(self): + return 42L + + class Foo2(long): + def __long__(self): + return 42L + + class Foo3(long): + def __long__(self): + return self + + class Foo4(long): + def __long__(self): + return 42 + + class Foo5(long): + def __long__(self): + return 42. + + self.assertEqual(long(Foo0()), 42L) + self.assertEqual(long(Foo1()), 42L) + self.assertEqual(long(Foo2()), 42L) + self.assertEqual(long(Foo3()), 0) + self.assertEqual(long(Foo4()), 42) + self.assertRaises(TypeError, long, Foo5()) + def test_map(self): self.assertEqual( map(None, 'hello world'), @@ -968,7 +1103,8 @@ if have_unicode: self.assertEqual(ord(unichr(sys.maxunicode)), sys.maxunicode) self.assertRaises(TypeError, ord, 42) - self.assertRaises(TypeError, ord, unicode("12")) + if have_unicode: + self.assertRaises(TypeError, ord, unicode("12")) def test_pow(self): self.assertEqual(pow(0,0), 1) @@ -1274,17 +1410,18 @@ self.assertRaises(ValueError, unichr, sys.maxunicode+1) self.assertRaises(TypeError, unichr) + # We don't want self in vars(), so these are static methods + + @staticmethod def get_vars_f0(): return vars() - # we don't want self in vars(), so use staticmethod - get_vars_f0 = staticmethod(get_vars_f0) + @staticmethod def get_vars_f2(): BuiltinTest.get_vars_f0() a = 1 b = 2 return vars() - get_vars_f2 = staticmethod(get_vars_f2) def test_vars(self): self.assertEqual(set(vars()), set(dir())) @@ -1360,11 +1497,17 @@ def test_inputtypes(self): s = 'abracadabra' - for T in [unicode, list, tuple]: + types = [list, tuple] + if have_unicode: + types.insert(0, unicode) + for T in types: self.assertEqual(sorted(s), sorted(T(s))) s = ''.join(dict.fromkeys(s).keys()) # unique letters only - for T in [unicode, set, frozenset, list, tuple, dict.fromkeys]: + types = [set, frozenset, list, tuple, dict.fromkeys] + if have_unicode: + types.insert(0, unicode) + for T in types: self.assertEqual(sorted(s), sorted(T(s))) def test_baddecorator(self): Index: test_bz2.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_bz2.py,v retrieving revision 1.12.8.2 retrieving revision 1.12.8.3 diff -u -d -r1.12.8.2 -r1.12.8.3 --- test_bz2.py 7 Jan 2005 06:58:49 -0000 1.12.8.2 +++ test_bz2.py 16 Oct 2005 05:24:00 -0000 1.12.8.3 @@ -37,7 +37,7 @@ return bz2.decompress(data) class BZ2FileTest(BaseTest): - "Test MCRYPT type miscelaneous methods." + "Test BZ2File type miscellaneous methods." def setUp(self): self.filename = TESTFN @@ -235,6 +235,32 @@ # "Test opening a nonexistent file" self.assertRaises(IOError, BZ2File, "/non/existent") + def testModeU(self): + # Bug #1194181: bz2.BZ2File opened for write with mode "U" + self.createTempFile() + bz2f = BZ2File(self.filename, "U") + bz2f.close() + f = file(self.filename) + f.seek(0, 2) + self.assertEqual(f.tell(), len(self.DATA)) + f.close() + + def testBug1191043(self): + # readlines() for files containing no newline + data = 'BZh91AY&SY\xd9b\x89]\x00\x00\x00\x03\x80\x04\x00\x02\x00\x0c\x00 \x00!\x9ah3M\x13<]\xc9\x14\xe1BCe\x8a%t' + f = open(self.filename, "wb") + f.write(data) + f.close() + bz2f = BZ2File(self.filename) + lines = bz2f.readlines() + bz2f.close() + self.assertEqual(lines, ['Test']) + bz2f = BZ2File(self.filename) + xlines = list(bz2f.xreadlines()) + bz2f.close() + self.assertEqual(lines, ['Test']) + + class BZ2CompressorTest(BaseTest): def testCompress(self): # "Test BZ2Compressor.compress()/flush()" Index: test_cd.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_cd.py,v retrieving revision 1.5.26.1 retrieving revision 1.5.26.2 diff -u -d -r1.5.26.1 -r1.5.26.2 --- test_cd.py 28 Apr 2003 17:29:04 -0000 1.5.26.1 +++ test_cd.py 16 Oct 2005 05:24:00 -0000 1.5.26.2 @@ -10,7 +10,7 @@ 'ident', 'index', 'msftoframe', 'open', 'pnum', 'ptime'] -# This is a very inobtrusive test for the existence of the cd module and all it's +# This is a very inobtrusive test for the existence of the cd module and all its # attributes. More comprehensive examples can be found in Demo/cd and # require that you have a CD and a CD ROM drive Index: test_cl.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_cl.py,v retrieving revision 1.5.26.1 retrieving revision 1.5.26.2 diff -u -d -r1.5.26.1 -r1.5.26.2 --- test_cl.py 28 Apr 2003 17:29:02 -0000 1.5.26.1 +++ test_cl.py 16 Oct 2005 05:24:00 -0000 1.5.26.2 @@ -64,7 +64,7 @@ # This is a very inobtrusive test for the existence of the cl -# module and all it's attributes. +# module and all its attributes. def main(): # touch all the attributes of al without doing anything Index: test_class.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_class.py,v retrieving revision 1.8.2.2 retrieving revision 1.8.2.3 diff -u -d -r1.8.2.2 -r1.8.2.3 --- test_class.py 7 Jan 2005 06:58:50 -0000 1.8.2.2 +++ test_class.py 16 Oct 2005 05:24:00 -0000 1.8.2.3 @@ -353,7 +353,7 @@ try: A().a # Raised AttributeError: A instance has no attribute 'a' except AttributeError, x: - if str(x) is not "booh": + if str(x) != "booh": print "attribute error for A().a got masked:", str(x) class E: Index: test_codeccallbacks.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_codeccallbacks.py,v retrieving revision 1.10.8.2 retrieving revision 1.10.8.3 diff -u -d -r1.10.8.2 -r1.10.8.3 --- test_codeccallbacks.py 7 Jan 2005 06:58:50 -0000 1.10.8.2 +++ test_codeccallbacks.py 16 Oct 2005 05:24:00 -0000 1.10.8.3 @@ -111,7 +111,7 @@ sout += "\\U%08x" % sys.maxunicode self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) - def test_relaxedutf8(self): + def test_decoderelaxedutf8(self): # This is the test for a decoding callback handler, # that relaxes the UTF-8 minimal encoding restriction. # A null byte that is encoded as "\xc0\x80" will be @@ -158,6 +158,35 @@ charmap[ord("?")] = u"XYZ" self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) + def test_decodeunicodeinternal(self): + self.assertRaises( + UnicodeDecodeError, + "\x00\x00\x00\x00\x00".decode, + "unicode-internal", + ) + if sys.maxunicode > 0xffff: + def handler_unicodeinternal(exc): + if not isinstance(exc, UnicodeDecodeError): + raise TypeError("don't know how to handle %r" % exc) + return (u"\x01", 1) + + self.assertEqual( + "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"), + u"\u0000" + ) + + self.assertEqual( + "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"), + u"\u0000\ufffd" + ) + + codecs.register_error("test.hui", handler_unicodeinternal) + + self.assertEqual( + "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"), + u"\u0000\u0001\u0000" + ) + def test_callbacks(self): def handler1(exc): if not isinstance(exc, UnicodeEncodeError) \ @@ -503,7 +532,8 @@ for (enc, bytes) in ( ("ascii", "\xff"), ("utf-8", "\xff"), - ("utf-7", "+x-") + ("utf-7", "+x-"), + ("unicode-internal", "\x00"), ): self.assertRaises( TypeError, Index: test_codecs.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_codecs.py,v retrieving revision 1.2.14.2 retrieving revision 1.2.14.3 diff -u -d -r1.2.14.2 -r1.2.14.3 --- test_codecs.py 7 Jan 2005 06:58:50 -0000 1.2.14.2 +++ test_codecs.py 16 Oct 2005 05:24:00 -0000 1.2.14.3 @@ -1,7 +1,7 @@ from test import test_support import unittest import codecs -import StringIO +import sys, StringIO class Queue(object): """ @@ -73,15 +73,88 @@ # reader has to look ahead whether this is a lone \r or a \r\n for size in xrange(80): for lineend in u"\n \r\n \r \u2028".split(): - s = size*u"a" + lineend + u"xxx\n" - self.assertEqual( - getreader(s).readline(keepends=True), - size*u"a" + lineend, - ) - self.assertEqual( - getreader(s).readline(keepends=False), - size*u"a", - ) + s = 10*(size*u"a" + lineend + u"xxx\n") + reader = getreader(s) + for i in xrange(10): + self.assertEqual( + reader.readline(keepends=True), + size*u"a" + lineend, + ) + reader = getreader(s) + for i in xrange(10): + self.assertEqual( + reader.readline(keepends=False), + size*u"a", + ) + + def test_bug1175396(self): + s = [ + '<%!--===================================================\r\n', + ' BLOG index page: show recent articles,\r\n', + ' today\'s articles, or articles of a specific date.\r\n', + '========================================================--%>\r\n', + '<%@inputencoding="ISO-8859-1"%>\r\n', + '<%@pagetemplate=TEMPLATE.y%>\r\n', + '<%@import=import frog.util, frog%>\r\n', + '<%@import=import frog.objects%>\r\n', + '<%@import=from frog.storageerrors import StorageError%>\r\n', + '<%\r\n', + '\r\n', + 'import logging\r\n', + 'log=logging.getLogger("Snakelets.logger")\r\n', + '\r\n', + '\r\n', + 'user=self.SessionCtx.user\r\n', + 'storageEngine=self.SessionCtx.storageEngine\r\n', + '\r\n', + '\r\n', + 'def readArticlesFromDate(date, count=None):\r\n', + ' entryids=storageEngine.listBlogEntries(date)\r\n', + ' entryids.reverse() # descending\r\n', + ' if count:\r\n', + ' entryids=entryids[:count]\r\n', + ' try:\r\n', + ' return [ frog.objects.BlogEntry.load(storageEngine, date, Id) for Id in entryids ]\r\n', + ' except StorageError,x:\r\n', + ' log.error("Error loading articles: "+str(x))\r\n', + ' self.abort("cannot load articles")\r\n', + '\r\n', + 'showdate=None\r\n', + '\r\n', + 'arg=self.Request.getArg()\r\n', + 'if arg=="today":\r\n', + ' #-------------------- TODAY\'S ARTICLES\r\n', + ' self.write("

Today\'s articles

")\r\n', + ' showdate = frog.util.isodatestr() \r\n', + ' entries = readArticlesFromDate(showdate)\r\n', + 'elif arg=="active":\r\n', + ' #-------------------- ACTIVE ARTICLES redirect\r\n', + ' self.Yredirect("active.y")\r\n', + 'elif arg=="login":\r\n', + ' #-------------------- LOGIN PAGE redirect\r\n', + ' self.Yredirect("login.y")\r\n', + 'elif arg=="date":\r\n', + ' #-------------------- ARTICLES OF A SPECIFIC DATE\r\n', + ' showdate = self.Request.getParameter("date")\r\n', + ' self.write("

Articles written on %s

"% frog.util.mediumdatestr(showdate))\r\n', + ' entries = readArticlesFromDate(showdate)\r\n', + 'else:\r\n', + ' #-------------------- RECENT ARTICLES\r\n', + ' self.write("

Recent articles

")\r\n', + ' dates=storageEngine.listBlogEntryDates()\r\n', + ' if dates:\r\n', + ' entries=[]\r\n', + ' SHOWAMOUNT=10\r\n', + ' for showdate in dates:\r\n', + ' entries.extend( readArticlesFromDate(showdate, SHOWAMOUNT-len(entries)) )\r\n', + ' if len(entries)>=SHOWAMOUNT:\r\n', + ' break\r\n', + ' \r\n', + ] + stream = StringIO.StringIO("".join(s).encode(self.encoding)) + reader = codecs.getreader(self.encoding)(stream) + for (i, line) in enumerate(reader): + self.assertEqual(line, s[i]) def test_readlinequeue(self): q = Queue() @@ -92,6 +165,7 @@ writer.write(u"foo\r") self.assertEqual(reader.readline(keepends=False), u"foo") writer.write(u"\nbar\r") + self.assertEqual(reader.readline(keepends=False), u"") self.assertEqual(reader.readline(keepends=False), u"bar") writer.write(u"baz") self.assertEqual(reader.readline(keepends=False), u"baz") @@ -101,6 +175,7 @@ writer.write(u"foo\r") self.assertEqual(reader.readline(keepends=True), u"foo\r") writer.write(u"\nbar\r") + self.assertEqual(reader.readline(keepends=True), u"\n") self.assertEqual(reader.readline(keepends=True), u"bar\r") writer.write(u"baz") self.assertEqual(reader.readline(keepends=True), u"baz") @@ -108,6 +183,36 @@ writer.write(u"foo\r\n") self.assertEqual(reader.readline(keepends=True), u"foo\r\n") + def test_bug1098990_a(self): + s1 = u"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\r\n" + s2 = u"offending line: ladfj askldfj klasdj fskla dfzaskdj fasklfj laskd fjasklfzzzzaa%whereisthis!!!\r\n" + s3 = u"next line.\r\n" + + s = (s1+s2+s3).encode(self.encoding) + stream = StringIO.StringIO(s) + reader = codecs.getreader(self.encoding)(stream) + self.assertEqual(reader.readline(), s1) + self.assertEqual(reader.readline(), s2) + self.assertEqual(reader.readline(), s3) + self.assertEqual(reader.readline(), u"") + + def test_bug1098990_b(self): + s1 = u"aaaaaaaaaaaaaaaaaaaaaaaa\r\n" + s2 = u"bbbbbbbbbbbbbbbbbbbbbbbb\r\n" + s3 = u"stillokay:bbbbxx\r\n" + s4 = u"broken!!!!badbad\r\n" + s5 = u"againokay.\r\n" + + s = (s1+s2+s3+s4+s5).encode(self.encoding) + stream = StringIO.StringIO(s) + reader = codecs.getreader(self.encoding)(stream) + self.assertEqual(reader.readline(), s1) + self.assertEqual(reader.readline(), s2) + self.assertEqual(reader.readline(), s3) + self.assertEqual(reader.readline(), s4) + self.assertEqual(reader.readline(), s5) + self.assertEqual(reader.readline(), u"") + class UTF16Test(ReadTest): encoding = "utf-16" @@ -129,6 +234,15 @@ f = reader(s) self.assertEquals(f.read(), u"spamspam") + def test_badbom(self): + s = StringIO.StringIO("\xff\xff") + f = codecs.getreader(self.encoding)(s) + self.assertRaises(UnicodeError, f.read) + + s = StringIO.StringIO("\xff\xff\xff\xff") + f = codecs.getreader(self.encoding)(s) + self.assertRaises(UnicodeError, f.read) + def test_partial(self): self.check_partial( u"\x00\xff\u0100\uffff", @@ -339,6 +453,54 @@ for uni, puny in punycode_testcases: self.assertEquals(uni, puny.decode("punycode")) +class UnicodeInternalTest(unittest.TestCase): + def test_bug1251300(self): + # Decoding with unicode_internal used to not correctly handle "code + # points" above 0x10ffff on UCS-4 builds. + if sys.maxunicode > 0xffff: + ok = [ + ("\x00\x10\xff\xff", u"\U0010ffff"), + ("\x00\x00\x01\x01", u"\U00000101"), + ("", u""), + ] + not_ok = [ + "\x7f\xff\xff\xff", + "\x80\x00\x00\x00", + "\x81\x00\x00\x00", + "\x00", + "\x00\x00\x00\x00\x00", + ] + for internal, uni in ok: + if sys.byteorder == "little": + internal = "".join(reversed(internal)) + self.assertEquals(uni, internal.decode("unicode_internal")) + for internal in not_ok: + if sys.byteorder == "little": + internal = "".join(reversed(internal)) + self.assertRaises(UnicodeDecodeError, internal.decode, + "unicode_internal") + + def test_decode_error_attributes(self): + if sys.maxunicode > 0xffff: + try: + "\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal") + except UnicodeDecodeError, ex: + self.assertEquals("unicode_internal", ex.encoding) + self.assertEquals("\x00\x00\x00\x00\x00\x11\x11\x00", ex.object) + self.assertEquals(4, ex.start) + self.assertEquals(8, ex.end) + else: + self.fail() + + def test_decode_callback(self): + if sys.maxunicode > 0xffff: + codecs.register_error("UnicodeInternalTest", codecs.ignore_errors) + decoder = codecs.getdecoder("unicode_internal") + ab = u"ab".encode("unicode_internal") + ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]), + "UnicodeInternalTest") + self.assertEquals((u"ab", 12), ignored) + # From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html nameprep_tests = [ # 3.1 Map to nothing. @@ -516,6 +678,12 @@ def test_builtin(self): self.assertEquals(unicode("python.org", "idna"), u"python.org") + def test_stream(self): + import StringIO + r = codecs.getreader("idna")(StringIO.StringIO("abc")) + r.read(3) + self.assertEquals(r.read(), u"") + class CodecsModuleTest(unittest.TestCase): def test_decode(self): @@ -549,6 +717,22 @@ f = self.reader(self.stream) self.assertEquals(f.readlines(), [u'\ud55c\n', u'\uae00']) +class Str2StrTest(unittest.TestCase): + + def test_read(self): + sin = "\x80".encode("base64_codec") + reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin)) + sout = reader.read() + self.assertEqual(sout, "\x80") + self.assert_(isinstance(sout, str)) + + def test_readline(self): + sin = "\x80".encode("base64_codec") + reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin)) + sout = reader.readline() + self.assertEqual(sout, "\x80") + self.assert_(isinstance(sout, str)) + all_unicode_encodings = [ "ascii", "base64_codec", @@ -716,6 +900,21 @@ decodedresult += reader.read() self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding)) + def test_seek(self): + # all codecs should be able to encode these + s = u"%s\n%s\n" % (100*u"abc123", 100*u"def456") + for encoding in all_unicode_encodings: + if encoding == "idna": # FIXME: See SF bug #1163178 + continue + if encoding in broken_unicode_with_streams: + continue + reader = codecs.getreader(encoding)(StringIO.StringIO(s.encode(encoding))) + for t in xrange(5): + # Test that calling seek resets the internal codec state and buffers + reader.seek(0, 0) + line = reader.readline() + self.assertEqual(s[:len(line)], line) + class BasicStrTest(unittest.TestCase): def test_basics(self): s = "abc123" @@ -725,6 +924,40 @@ (chars, size) = codecs.getdecoder(encoding)(bytes) self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding)) +class CharmapTest(unittest.TestCase): + def test_decode_with_string_map(self): + self.assertEquals( + codecs.charmap_decode("\x00\x01\x02", "strict", u"abc"), + (u"abc", 3) + ) + + self.assertEquals( + codecs.charmap_decode("\x00\x01\x02", "replace", u"ab"), + (u"ab\ufffd", 3) + ) + + self.assertEquals( + codecs.charmap_decode("\x00\x01\x02", "replace", u"ab\ufffe"), + (u"ab\ufffd", 3) + ) + + self.assertEquals( + codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab"), + (u"ab", 3) + ) + + self.assertEquals( + codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab\ufffe"), + (u"ab", 3) + ) + + allbytes = "".join(chr(i) for i in xrange(256)) + self.assertEquals( + codecs.charmap_decode(allbytes, "ignore", u""), + (u"", len(allbytes)) + ) + + def test_main(): test_support.run_unittest( UTF16Test, @@ -734,12 +967,15 @@ EscapeDecodeTest, RecodingTest, PunycodeTest, + UnicodeInternalTest, NameprepTest, CodecTest, CodecsModuleTest, StreamReaderTest, + Str2StrTest, BasicUnicodeTest, - BasicStrTest + BasicStrTest, + CharmapTest ) Index: test_compiler.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_compiler.py,v retrieving revision 1.10.4.1 retrieving revision 1.10.4.2 diff -u -d -r1.10.4.1 -r1.10.4.2 --- test_compiler.py 7 Jan 2005 06:58:50 -0000 1.10.4.1 +++ test_compiler.py 16 Oct 2005 05:24:00 -0000 1.10.4.2 @@ -1,4 +1,5 @@ import compiler +from compiler.ast import flatten import os import test.test_support import unittest @@ -33,6 +34,9 @@ else: compiler.compile(buf, basename, "exec") + def testNewClassSyntax(self): + compiler.compile("class foo():pass\n\n","","exec") + def testLineNo(self): # Test that all nodes except Module have a correct lineno attribute. filename = __file__ @@ -57,6 +61,10 @@ for child in node.getChildNodes(): self.check_lineno(child) + def testFlatten(self): + self.assertEquals(flatten([1, [2]]), [1, 2]) + self.assertEquals(flatten((1, (2,))), [1, 2]) + NOLINENO = (compiler.ast.Module, compiler.ast.Stmt, compiler.ast.Discard) ############################################################################### Index: test_complex.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_complex.py,v retrieving revision 1.8.2.2 retrieving revision 1.8.2.3 diff -u -d -r1.8.2.2 -r1.8.2.3 --- test_complex.py 7 Jan 2005 06:58:50 -0000 1.8.2.2 +++ test_complex.py 16 Oct 2005 05:24:00 -0000 1.8.2.3 @@ -273,6 +273,28 @@ self.assertAlmostEqual(complex(real=float2(17.), imag=float2(23.)), 17+23j) self.assertRaises(TypeError, complex, float2(None)) + class complex0(complex): + """Test usage of __complex__() when inheriting from 'complex'""" + def __complex__(self): + return 42j + + class complex1(complex): + """Test usage of __complex__() with a __new__() method""" + def __new__(self, value=0j): + return complex.__new__(self, 2*value) + def __complex__(self): + return self + + class complex2(complex): + """Make sure that __complex__() calls fail if anything other than a + complex is returned""" + def __complex__(self): + return None + + self.assertAlmostEqual(complex(complex0(1j)), 42j) + self.assertAlmostEqual(complex(complex1(1j)), 2j) + self.assertRaises(TypeError, complex, complex2(1j)) + def test_hash(self): for x in xrange(-30, 30): self.assertEqual(hash(x), hash(complex(x, 0))) @@ -288,6 +310,8 @@ self.assertEqual(repr(1+6j), '(1+6j)') self.assertEqual(repr(1-6j), '(1-6j)') + self.assertNotEqual(repr(-(1+0j)), '(-1+-0j)') + def test_neg(self): self.assertEqual(-(1+6j), -1-6j) Index: test_cookie.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_cookie.py,v retrieving revision 1.11.14.2 retrieving revision 1.11.14.3 diff -u -d -r1.11.14.2 -r1.11.14.3 --- test_cookie.py 7 Jan 2005 06:58:50 -0000 1.11.14.2 +++ test_cookie.py 16 Oct 2005 05:24:00 -0000 1.11.14.3 @@ -12,17 +12,17 @@ cases = [ ('chips=ahoy; vienna=finger', {'chips':'ahoy', 'vienna':'finger'}), - ('keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;";', + ('keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"', {'keebler' : 'E=mc2; L="Loves"; fudge=\012;'}), # Check illegal cookies that have an '=' char in an unquoted value - ('keebler=E=mc2;', {'keebler' : 'E=mc2'}) + ('keebler=E=mc2', {'keebler' : 'E=mc2'}) ] for data, dict in cases: C = Cookie.SimpleCookie() ; C.load(data) print repr(C) - print str(C) + print C.output(sep='\n') for k, v in sorted(dict.iteritems()): print ' ', k, repr( C[k].value ), repr(v) verify(C[k].value == v) Index: test_cookielib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_cookielib.py,v retrieving revision 1.1.6.1 retrieving revision 1.1.6.2 diff -u -d -r1.1.6.1 -r1.1.6.2 --- test_cookielib.py 7 Jan 2005 06:58:50 -0000 1.1.6.1 +++ test_cookielib.py 16 Oct 2005 05:24:00 -0000 1.1.6.2 @@ -103,13 +103,23 @@ from cookielib import parse_ns_headers # quotes should be stripped - expected = [[('expires', 2209069412L), ('version', '0')]] + expected = [[('foo', 'bar'), ('expires', 2209069412L), ('version', '0')]] for hdr in [ - 'expires=01 Jan 2040 22:23:32 GMT', - 'expires="01 Jan 2040 22:23:32 GMT"', + 'foo=bar; expires=01 Jan 2040 22:23:32 GMT', + 'foo=bar; expires="01 Jan 2040 22:23:32 GMT"', ]: self.assertEquals(parse_ns_headers([hdr]), expected) + def test_parse_ns_headers_special_names(self): + # names such as 'expires' are not special in first name=value pair + # of Set-Cookie: header + from cookielib import parse_ns_headers + + # Cookie with name 'expires' + hdr = 'expires=01 Jan 2040 22:23:32 GMT' + expected = [[("expires", "01 Jan 2040 22:23:32 GMT"), ("version", "0")]] + self.assertEquals(parse_ns_headers([hdr]), expected) + def test_join_header_words(self): from cookielib import join_header_words @@ -221,6 +231,24 @@ return cookie_hdr +class FileCookieJarTests(TestCase): + def test_lwp_valueless_cookie(self): + # cookies with no value should be saved and loaded consistently + from cookielib import LWPCookieJar + filename = test_support.TESTFN + c = LWPCookieJar() + interact_netscape(c, "http://www.acme.com/", 'boo') + self.assertEqual(c._cookies["www.acme.com"]["/"]["boo"].value, None) + try: + c.save(filename, ignore_discard=True) + c = LWPCookieJar() + c.load(filename, ignore_discard=True) + finally: + try: os.unlink(filename) + except OSError: pass + self.assertEqual(c._cookies["www.acme.com"]["/"]["boo"].value, None) + + class CookieTests(TestCase): # XXX # Get rid of string comparisons where not actually testing str / repr. @@ -370,6 +398,19 @@ self.assert_(foo.expires is None) self.assert_(spam.expires is None) + def test_ns_parser_special_names(self): + # names such as 'expires' are not special in first name=value pair + # of Set-Cookie: header + from cookielib import CookieJar + + c = CookieJar() + interact_netscape(c, "http://www.acme.com/", 'expires=eggs') + interact_netscape(c, "http://www.acme.com/", 'version=eggs; spam=eggs') + + cookies = c._cookies["www.acme.com"]["/"] + self.assert_('expires' in cookies) + self.assert_('version' in cookies) + def test_expires(self): from cookielib import time2netscape, CookieJar @@ -1613,6 +1654,7 @@ DateTimeTests, HeaderTests, CookieTests, + FileCookieJarTests, LWPCookieTests, ) Index: test_csv.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_csv.py,v retrieving revision 1.5.4.2 retrieving revision 1.5.4.3 diff -u -d -r1.5.4.2 -r1.5.4.3 --- test_csv.py 7 Jan 2005 06:58:50 -0000 1.5.4.2 +++ test_csv.py 16 Oct 2005 05:24:00 -0000 1.5.4.3 @@ -17,44 +17,104 @@ from the high level interface. Further tests of this nature are done in TestDialectRegistry. """ + def _test_arg_valid(self, ctor, arg): + self.assertRaises(TypeError, ctor) + self.assertRaises(TypeError, ctor, None) + self.assertRaises(TypeError, ctor, arg, bad_attr = 0) + self.assertRaises(TypeError, ctor, arg, delimiter = 0) + self.assertRaises(TypeError, ctor, arg, delimiter = 'XX') + self.assertRaises(csv.Error, ctor, arg, 'foo') + self.assertRaises(TypeError, ctor, arg, delimiter=None) + self.assertRaises(TypeError, ctor, arg, delimiter=1) + self.assertRaises(TypeError, ctor, arg, quotechar=1) + self.assertRaises(TypeError, ctor, arg, lineterminator=None) + self.assertRaises(TypeError, ctor, arg, lineterminator=1) + self.assertRaises(TypeError, ctor, arg, quoting=None) + self.assertRaises(TypeError, ctor, arg, + quoting=csv.QUOTE_ALL, quotechar='') + self.assertRaises(TypeError, ctor, arg, + quoting=csv.QUOTE_ALL, quotechar=None) + def test_reader_arg_valid(self): - self.assertRaises(TypeError, csv.reader) - self.assertRaises(TypeError, csv.reader, None) - self.assertRaises(AttributeError, csv.reader, [], bad_attr = 0) - self.assertRaises(csv.Error, csv.reader, [], 'foo') - class BadClass: - def __init__(self): - raise IOError - self.assertRaises(IOError, csv.reader, [], BadClass) - self.assertRaises(TypeError, csv.reader, [], None) - class BadDialect: - bad_attr = 0 - self.assertRaises(AttributeError, csv.reader, [], BadDialect) + self._test_arg_valid(csv.reader, []) def test_writer_arg_valid(self): - self.assertRaises(TypeError, csv.writer) - self.assertRaises(TypeError, csv.writer, None) - self.assertRaises(AttributeError, csv.writer, StringIO(), bad_attr = 0) + self._test_arg_valid(csv.writer, StringIO()) - def _test_attrs(self, obj): + def _test_default_attrs(self, ctor, *args): + obj = ctor(*args) + # Check defaults self.assertEqual(obj.dialect.delimiter, ',') - obj.dialect.delimiter = '\t' - self.assertEqual(obj.dialect.delimiter, '\t') - self.assertRaises(TypeError, delattr, obj.dialect, 'delimiter') - self.assertRaises(TypeError, setattr, obj.dialect, - 'lineterminator', None) - obj.dialect.escapechar = None + self.assertEqual(obj.dialect.doublequote, True) self.assertEqual(obj.dialect.escapechar, None) - self.assertRaises(TypeError, delattr, obj.dialect, 'quoting') - self.assertRaises(TypeError, setattr, obj.dialect, 'quoting', None) - obj.dialect.quoting = csv.QUOTE_MINIMAL + self.assertEqual(obj.dialect.lineterminator, "\r\n") + self.assertEqual(obj.dialect.quotechar, '"') self.assertEqual(obj.dialect.quoting, csv.QUOTE_MINIMAL) + self.assertEqual(obj.dialect.skipinitialspace, False) + self.assertEqual(obj.dialect.strict, False) + # Try deleting or changing attributes (they are read-only) + self.assertRaises(TypeError, delattr, obj.dialect, 'delimiter') + self.assertRaises(TypeError, setattr, obj.dialect, 'delimiter', ':') + self.assertRaises(AttributeError, delattr, obj.dialect, 'quoting') + self.assertRaises(AttributeError, setattr, obj.dialect, + 'quoting', None) def test_reader_attrs(self): - self._test_attrs(csv.reader([])) + self._test_default_attrs(csv.reader, []) def test_writer_attrs(self): - self._test_attrs(csv.writer(StringIO())) + self._test_default_attrs(csv.writer, StringIO()) + + def _test_kw_attrs(self, ctor, *args): + # Now try with alternate options + kwargs = dict(delimiter=':', doublequote=False, escapechar='\\', + lineterminator='\r', quotechar='*', + quoting=csv.QUOTE_NONE, skipinitialspace=True, + strict=True) + obj = ctor(*args, **kwargs) + self.assertEqual(obj.dialect.delimiter, ':') + self.assertEqual(obj.dialect.doublequote, False) + self.assertEqual(obj.dialect.escapechar, '\\') + self.assertEqual(obj.dialect.lineterminator, "\r") + self.assertEqual(obj.dialect.quotechar, '*') + self.assertEqual(obj.dialect.quoting, csv.QUOTE_NONE) + self.assertEqual(obj.dialect.skipinitialspace, True) + self.assertEqual(obj.dialect.strict, True) + + def test_reader_kw_attrs(self): + self._test_kw_attrs(csv.reader, []) + + def test_writer_kw_attrs(self): + self._test_kw_attrs(csv.writer, StringIO()) + + def _test_dialect_attrs(self, ctor, *args): + # Now try with dialect-derived options + class dialect: + delimiter='-' + doublequote=False + escapechar='^' + lineterminator='$' + quotechar='#' + quoting=csv.QUOTE_ALL + skipinitialspace=True + strict=False + args = args + (dialect,) + obj = ctor(*args) + self.assertEqual(obj.dialect.delimiter, '-') + self.assertEqual(obj.dialect.doublequote, False) + self.assertEqual(obj.dialect.escapechar, '^') + self.assertEqual(obj.dialect.lineterminator, "$") + self.assertEqual(obj.dialect.quotechar, '#') + self.assertEqual(obj.dialect.quoting, csv.QUOTE_ALL) + self.assertEqual(obj.dialect.skipinitialspace, True) + self.assertEqual(obj.dialect.strict, False) + + def test_reader_dialect_attrs(self): + self._test_dialect_attrs(csv.reader, []) + + def test_writer_dialect_attrs(self): + self._test_dialect_attrs(csv.writer, StringIO()) + def _write_test(self, fields, expect, **kwargs): fd, name = tempfile.mkstemp() @@ -95,25 +155,35 @@ (bigstring, bigstring)) def test_write_quoting(self): - self._write_test(['a','1','p,q'], 'a,1,"p,q"') + self._write_test(['a',1,'p,q'], 'a,1,"p,q"') self.assertRaises(csv.Error, self._write_test, - ['a','1','p,q'], 'a,1,"p,q"', + ['a',1,'p,q'], 'a,1,p,q', quoting = csv.QUOTE_NONE) - self._write_test(['a','1','p,q'], 'a,1,"p,q"', + self._write_test(['a',1,'p,q'], 'a,1,"p,q"', quoting = csv.QUOTE_MINIMAL) - self._write_test(['a','1','p,q'], '"a",1,"p,q"', + self._write_test(['a',1,'p,q'], '"a",1,"p,q"', quoting = csv.QUOTE_NONNUMERIC) - self._write_test(['a','1','p,q'], '"a","1","p,q"', + self._write_test(['a',1,'p,q'], '"a","1","p,q"', quoting = csv.QUOTE_ALL) def test_write_escape(self): - self._write_test(['a','1','p,q'], 'a,1,"p,q"', + self._write_test(['a',1,'p,q'], 'a,1,"p,q"', escapechar='\\') -# FAILED - needs to be fixed [am]: -# self._write_test(['a','1','p,"q"'], 'a,1,"p,\\"q\\"', -# escapechar='\\', doublequote = 0) - self._write_test(['a','1','p,q'], 'a,1,p\\,q', + self.assertRaises(csv.Error, + self._write_test, + ['a',1,'p,"q"'], 'a,1,"p,\\"q\\""', + escapechar=None, doublequote=False) + self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""', + escapechar='\\', doublequote = False) + self._write_test(['"'], '""""', + escapechar='\\', quoting = csv.QUOTE_MINIMAL) + self._write_test(['"'], '\\"', + escapechar='\\', quoting = csv.QUOTE_MINIMAL, + doublequote = False) + self._write_test(['"'], '\\"', + escapechar='\\', quoting = csv.QUOTE_NONE) + self._write_test(['a',1,'p,q'], 'a,1,p\\,q', escapechar='\\', quoting = csv.QUOTE_NONE) def test_writerows(self): @@ -159,18 +229,56 @@ self.assertRaises(csv.Error, self._read_test, ['a,b\r\nc,d'], []) def test_read_escape(self): - self._read_test(['a,\\b,c'], [['a', '\\b', 'c']], escapechar='\\') + self._read_test(['a,\\b,c'], [['a', 'b', 'c']], escapechar='\\') self._read_test(['a,b\\,c'], [['a', 'b,c']], escapechar='\\') self._read_test(['a,"b\\,c"'], [['a', 'b,c']], escapechar='\\') - self._read_test(['a,"b,\\c"'], [['a', 'b,\\c']], escapechar='\\') + self._read_test(['a,"b,\\c"'], [['a', 'b,c']], escapechar='\\') self._read_test(['a,"b,c\\""'], [['a', 'b,c"']], escapechar='\\') self._read_test(['a,"b,c"\\'], [['a', 'b,c\\']], escapechar='\\') + def test_read_quoting(self): + self._read_test(['1,",3,",5'], [['1', ',3,', '5']]) + self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']], + quotechar=None, escapechar='\\') + self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']], + quoting=csv.QUOTE_NONE, escapechar='\\') + # will this fail where locale uses comma for decimals? + self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]], + quoting=csv.QUOTE_NONNUMERIC) + self.assertRaises(ValueError, self._read_test, + ['abc,3'], [[]], + quoting=csv.QUOTE_NONNUMERIC) + def test_read_bigfield(self): - # This exercises the buffer realloc functionality - bigstring = 'X' * 50000 - bigline = '%s,%s' % (bigstring, bigstring) - self._read_test([bigline], [[bigstring, bigstring]]) + # This exercises the buffer realloc functionality and field size + # limits. + limit = csv.field_size_limit() + try: + size = 50000 + bigstring = 'X' * size + bigline = '%s,%s' % (bigstring, bigstring) + self._read_test([bigline], [[bigstring, bigstring]]) + csv.field_size_limit(size) + self._read_test([bigline], [[bigstring, bigstring]]) + self.assertEqual(csv.field_size_limit(), size) + csv.field_size_limit(size-1) + self.assertRaises(csv.Error, self._read_test, [bigline], []) + self.assertRaises(TypeError, csv.field_size_limit, None) + self.assertRaises(TypeError, csv.field_size_limit, 1, None) + finally: + csv.field_size_limit(limit) + + def test_read_linenum(self): + r = csv.reader(['line,1', 'line,2', 'line,3']) + self.assertEqual(r.line_num, 0) + r.next() + self.assertEqual(r.line_num, 1) + r.next() + self.assertEqual(r.line_num, 2) + r.next() + self.assertEqual(r.line_num, 3) + self.assertRaises(StopIteration, r.next) + self.assertEqual(r.line_num, 3) class TestDialectRegistry(unittest.TestCase): def test_registry_badargs(self): @@ -183,11 +291,12 @@ self.assertRaises(csv.Error, csv.unregister_dialect, "nonesuch") self.assertRaises(TypeError, csv.register_dialect, None) self.assertRaises(TypeError, csv.register_dialect, None, None) - self.assertRaises(TypeError, csv.register_dialect, "nonesuch", None) - class bogus: - def __init__(self): - raise KeyError - self.assertRaises(KeyError, csv.register_dialect, "nonesuch", bogus) + self.assertRaises(TypeError, csv.register_dialect, "nonesuch", 0, 0) + self.assertRaises(TypeError, csv.register_dialect, "nonesuch", + badargument=None) + self.assertRaises(TypeError, csv.register_dialect, "nonesuch", + quoting=None) + self.assertRaises(TypeError, csv.register_dialect, []) def test_registry(self): class myexceltsv(csv.excel): @@ -197,13 +306,22 @@ expected_dialects.sort() csv.register_dialect(name, myexceltsv) try: - self.failUnless(isinstance(csv.get_dialect(name), myexceltsv)) + self.failUnless(csv.get_dialect(name).delimiter, '\t') got_dialects = csv.list_dialects() got_dialects.sort() self.assertEqual(expected_dialects, got_dialects) finally: csv.unregister_dialect(name) + def test_register_kwargs(self): + name = 'fedcba' + csv.register_dialect(name, delimiter=';') + try: + self.failUnless(csv.get_dialect(name).delimiter, '\t') + self.failUnless(list(csv.reader('X;Y;Z', name)), ['X', 'Y', 'Z']) + finally: + csv.unregister_dialect(name) + def test_incomplete_dialect(self): class myexceltsv(csv.Dialect): delimiter = "\t" @@ -297,7 +415,7 @@ def test_bad_dialect(self): # Unknown parameter - self.assertRaises(AttributeError, csv.reader, [], bad_attr = 0) + self.assertRaises(TypeError, csv.reader, [], bad_attr = 0) # Bad values self.assertRaises(TypeError, csv.reader, [], delimiter = None) self.assertRaises(TypeError, csv.reader, [], quoting = -1) @@ -661,10 +779,6 @@ mydialect.quoting = None self.assertRaises(csv.Error, mydialect) - mydialect.quoting = csv.QUOTE_NONE - mydialect.escapechar = None - self.assertRaises(csv.Error, mydialect) - mydialect.doublequote = True mydialect.quoting = csv.QUOTE_ALL mydialect.quotechar = '"' Index: test_curses.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_curses.py,v retrieving revision 1.1.10.2 retrieving revision 1.1.10.3 diff -u -d -r1.1.10.2 -r1.1.10.3 --- test_curses.py 7 Jan 2005 06:58:50 -0000 1.1.10.2 +++ test_curses.py 16 Oct 2005 05:24:00 -0000 1.1.10.3 @@ -107,6 +107,7 @@ stdscr.scroll(2) stdscr.scroll(-3) + stdscr.move(12, 2) stdscr.setscrreg(10,15) win3 = stdscr.subwin(10,10) win3 = stdscr.subwin(10,10, 5,5) @@ -179,7 +180,7 @@ curses.init_pair(2, 1,1) curses.color_content(1) curses.color_pair(2) - curses.pair_content(curses.COLOR_PAIRS) + curses.pair_content(curses.COLOR_PAIRS - 1) curses.pair_number(0) if hasattr(curses, 'use_default_colors'): Index: test_datetime.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_datetime.py,v retrieving revision 1.41.4.2 retrieving revision 1.41.4.3 diff -u -d -r1.41.4.2 -r1.41.4.3 --- test_datetime.py 7 Jan 2005 06:58:50 -0000 1.41.4.2 +++ test_datetime.py 16 Oct 2005 05:24:00 -0000 1.41.4.3 @@ -446,9 +446,9 @@ def test_subclass_timedelta(self): class T(timedelta): + @staticmethod def from_td(td): return T(td.days, td.seconds, td.microseconds) - from_td = staticmethod(from_td) def as_hours(self): sum = (self.days * 24 + @@ -1421,6 +1421,15 @@ # Else try again a few times. self.failUnless(abs(from_timestamp - from_now) <= tolerance) + def test_strptime(self): + import time + + string = '2004-12-01 13:02:47' + format = '%Y-%m-%d %H:%M:%S' + expected = self.theclass(*(time.strptime(string, format)[0:6])) + got = self.theclass.strptime(string, format) + self.assertEqual(expected, got) + def test_more_timetuple(self): # This tests fields beyond those tested by the TestDate.test_timetuple. t = self.theclass(2004, 12, 31, 6, 22, 33) Index: test_decimal.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_decimal.py,v retrieving revision 1.16.2.1 retrieving revision 1.16.2.2 diff -u -d -r1.16.2.1 -r1.16.2.2 --- test_decimal.py 7 Jan 2005 06:58:50 -0000 1.16.2.1 +++ test_decimal.py 16 Oct 2005 05:24:00 -0000 1.16.2.2 @@ -24,8 +24,6 @@ with the corresponding argument. """ -from __future__ import division - import unittest import glob import os, sys @@ -54,9 +52,9 @@ else: file = __file__ testdir = os.path.dirname(file) or os.curdir -dir = testdir + os.sep + TESTDATADIR + os.sep +directory = testdir + os.sep + TESTDATADIR + os.sep -skip_expected = not os.path.isdir(dir) +skip_expected = not os.path.isdir(directory) # Make sure it actually raises errors when not expected and caught in flags # Slower, since it runs some things several times. @@ -109,7 +107,6 @@ Changed for unittest. """ def setUp(self): - global dir self.context = Context() for key in DefaultContext.traps.keys(): DefaultContext.traps[key] = 1 @@ -302,11 +299,11 @@ # Dynamically build custom test definition for each file in the test # directory and add the definitions to the DecimalTest class. This # procedure insures that new files do not get skipped. -for filename in os.listdir(dir): +for filename in os.listdir(directory): if '.decTest' not in filename: continue head, tail = filename.split('.') - tester = lambda self, f=filename: self.eval_file(dir + f) + tester = lambda self, f=filename: self.eval_file(directory + f) setattr(DecimalTest, 'test_' + head, tester) del filename, head, tail, tester @@ -476,6 +473,52 @@ def test_implicit_from_Decimal(self): self.assertEqual(Decimal(5) + Decimal(45), Decimal(50)) + def test_rop(self): + # Allow other classes to be trained to interact with Decimals + class E: + def __divmod__(self, other): + return 'divmod ' + str(other) + def __rdivmod__(self, other): + return str(other) + ' rdivmod' + def __lt__(self, other): + return 'lt ' + str(other) + def __gt__(self, other): + return 'gt ' + str(other) + def __le__(self, other): + return 'le ' + str(other) + def __ge__(self, other): + return 'ge ' + str(other) + def __eq__(self, other): + return 'eq ' + str(other) + def __ne__(self, other): + return 'ne ' + str(other) + + self.assertEqual(divmod(E(), Decimal(10)), 'divmod 10') + self.assertEqual(divmod(Decimal(10), E()), '10 rdivmod') + self.assertEqual(eval('Decimal(10) < E()'), 'gt 10') + self.assertEqual(eval('Decimal(10) > E()'), 'lt 10') + self.assertEqual(eval('Decimal(10) <= E()'), 'ge 10') + self.assertEqual(eval('Decimal(10) >= E()'), 'le 10') + self.assertEqual(eval('Decimal(10) == E()'), 'eq 10') + self.assertEqual(eval('Decimal(10) != E()'), 'ne 10') + + # insert operator methods and then exercise them + for sym, lop, rop in ( + ('+', '__add__', '__radd__'), + ('-', '__sub__', '__rsub__'), + ('*', '__mul__', '__rmul__'), + ('/', '__div__', '__rdiv__'), + ('%', '__mod__', '__rmod__'), + ('//', '__floordiv__', '__rfloordiv__'), + ('**', '__pow__', '__rpow__'), + ): + + setattr(E, lop, lambda self, other: 'str' + lop + str(other)) + setattr(E, rop, lambda self, other: str(other) + rop + 'str') + self.assertEqual(eval('E()' + sym + 'Decimal(10)'), + 'str' + lop + '10') + self.assertEqual(eval('Decimal(10)' + sym + 'E()'), + '10' + rop + 'str') class DecimalArithmeticOperatorsTest(unittest.TestCase): '''Unit tests for all arithmetic operators, binary and unary.''' @@ -811,6 +854,9 @@ hash(Decimal(23)) #the same hash that to an int self.assertEqual(hash(Decimal(23)), hash(23)) + self.assertRaises(TypeError, hash, Decimal('NaN')) + self.assert_(hash(Decimal('Inf'))) + self.assert_(hash(Decimal('-Inf'))) def test_min_and_max_methods(self): Index: test_deque.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_deque.py,v retrieving revision 1.18.4.1 retrieving revision 1.18.4.2 diff -u -d -r1.18.4.1 -r1.18.4.2 --- test_deque.py 7 Jan 2005 06:58:51 -0000 1.18.4.1 +++ test_deque.py 16 Oct 2005 05:24:00 -0000 1.18.4.2 @@ -1,6 +1,6 @@ from collections import deque import unittest -from test import test_support +from test import test_support, seq_tests from weakref import proxy import copy import cPickle as pickle @@ -14,6 +14,18 @@ raise SyntaxError yield 1 +class BadCmp: + def __eq__(self, other): + raise RuntimeError + +class MutateCmp: + def __init__(self, deque, result): + self.deque = deque + self.result = result + def __eq__(self, other): + self.deque.clear() + return self.result + class TestBasic(unittest.TestCase): def test_basics(self): @@ -197,6 +209,30 @@ d.clear() # clear an emtpy deque self.assertEqual(list(d), []) + def test_remove(self): + d = deque('abcdefghcij') + d.remove('c') + self.assertEqual(d, deque('abdefghcij')) + d.remove('c') + self.assertEqual(d, deque('abdefghij')) + self.assertRaises(ValueError, d.remove, 'c') + self.assertEqual(d, deque('abdefghij')) + + # Handle comparison errors + d = deque(['a', 'b', BadCmp(), 'c']) + e = deque(d) + self.assertRaises(RuntimeError, d.remove, 'c') + for x, y in zip(d, e): + # verify that original order and values are retained. + self.assert_(x is y) + + # Handle evil mutator + for match in (True, False): + d = deque(['ab']) + d.extend([MutateCmp(d, match), 'c']) + self.assertRaises(IndexError, d.remove, 'c') + self.assertEqual(d, deque()) + def test_repr(self): d = deque(xrange(200)) e = eval(repr(d)) @@ -342,93 +378,17 @@ d.append(1) gc.collect() -def R(seqn): - 'Regular generator' - for i in seqn: - yield i - -class G: - 'Sequence using __getitem__' - def __init__(self, seqn): - self.seqn = seqn - def __getitem__(self, i): - return self.seqn[i] - -class I: - 'Sequence using iterator protocol' - def __init__(self, seqn): - self.seqn = seqn - self.i = 0 - def __iter__(self): - return self - def next(self): - if self.i >= len(self.seqn): raise StopIteration - v = self.seqn[self.i] - self.i += 1 - return v - -class Ig: - 'Sequence using iterator protocol defined with a generator' - def __init__(self, seqn): - self.seqn = seqn - self.i = 0 - def __iter__(self): - for val in self.seqn: - yield val - -class X: - 'Missing __getitem__ and __iter__' - def __init__(self, seqn): - self.seqn = seqn - self.i = 0 - def next(self): - if self.i >= len(self.seqn): raise StopIteration - v = self.seqn[self.i] - self.i += 1 - return v - -class N: - 'Iterator missing next()' - def __init__(self, seqn): - self.seqn = seqn - self.i = 0 - def __iter__(self): - return self - -class E: - 'Test propagation of exceptions' - def __init__(self, seqn): - self.seqn = seqn - self.i = 0 - def __iter__(self): - return self - def next(self): - 3 // 0 - -class S: - 'Test immediate stop' - def __init__(self, seqn): - pass - def __iter__(self): - return self - def next(self): - raise StopIteration - -from itertools import chain, imap -def L(seqn): - 'Test multiple tiers of iterators' - return chain(imap(lambda x:x, R(Ig(G(seqn))))) - - class TestVariousIteratorArgs(unittest.TestCase): def test_constructor(self): for s in ("123", "", range(1000), ('do', 1.2), xrange(2000,2200,5)): - for g in (G, I, Ig, S, L, R): + for g in (seq_tests.Sequence, seq_tests.IterFunc, + seq_tests.IterGen, seq_tests.IterFuncStop, + seq_tests.itermulti, seq_tests.iterfunc): self.assertEqual(list(deque(g(s))), list(g(s))) - self.assertRaises(TypeError, deque, X(s)) - self.assertRaises(TypeError, deque, N(s)) - self.assertRaises(ZeroDivisionError, deque, E(s)) + self.assertRaises(TypeError, deque, seq_tests.IterNextOnly(s)) + self.assertRaises(TypeError, deque, seq_tests.IterNoNext(s)) + self.assertRaises(ZeroDivisionError, deque, seq_tests.IterGenExc(s)) def test_iter_with_altered_data(self): d = deque('abcdefg') Index: test_descr.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_descr.py,v retrieving revision 1.144.2.2 retrieving revision 1.144.2.3 diff -u -d -r1.144.2.2 -r1.144.2.3 --- test_descr.py 7 Jan 2005 06:58:51 -0000 1.144.2.2 +++ test_descr.py 16 Oct 2005 05:24:00 -0000 1.144.2.3 @@ -691,13 +691,13 @@ class _instance(object): pass class M2(object): + @staticmethod def __new__(cls, name, bases, dict): self = object.__new__(cls) self.name = name self.bases = bases self.dict = dict return self - __new__ = staticmethod(__new__) def __call__(self): it = _instance() # Early binding of methods @@ -2071,9 +2071,9 @@ aProp = property(lambda self: "foo") class Sub(Base): + @classmethod def test(klass): return super(Sub,klass).aProp - test = classmethod(test) veris(Sub.test(), Base.aProp) @@ -2712,7 +2712,7 @@ def cant(x, dict): try: x.__dict__ = dict - except TypeError: + except (AttributeError, TypeError): pass else: raise TestFailed, "shouldn't allow %r.__dict__ = %r" % (x, dict) @@ -3965,6 +3965,18 @@ import gc; gc.collect() vereq(hasattr(c, 'attr'), False) +def test_init(): + # SF 1155938 + class Foo(object): + def __init__(self): + return 10 + try: + Foo() + except TypeError: + pass + else: + raise TestFailed, "did not test __init__() for None return" + def test_main(): weakref_segfault() # Must be first, somehow @@ -4058,6 +4070,7 @@ carloverre() filefault() vicious_descriptor_nonsense() + test_init() if verbose: print "All OK" Index: test_descrtut.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_descrtut.py,v retrieving revision 1.13.2.2 retrieving revision 1.13.2.3 diff -u -d -r1.13.2.2 -r1.13.2.3 --- test_descrtut.py 7 Jan 2005 06:58:51 -0000 1.13.2.2 +++ test_descrtut.py 16 Oct 2005 05:24:00 -0000 1.13.2.3 @@ -246,9 +246,9 @@ >>> class C: ... + ... @staticmethod ... def foo(x, y): ... print "staticmethod", x, y - ... foo = staticmethod(foo) >>> C.foo(1, 2) staticmethod 1 2 @@ -260,9 +260,9 @@ implicit first argument that is the *class* for which they are invoked. >>> class C: + ... @classmethod ... def foo(cls, y): ... print "classmethod", cls, y - ... foo = classmethod(foo) >>> C.foo(1) classmethod test.test_descrtut.C 1 @@ -286,10 +286,10 @@ But notice this: >>> class E(C): + ... @classmethod ... def foo(cls, y): # override C.foo ... print "E.foo() called" ... C.foo(y) - ... foo = classmethod(foo) >>> E.foo(1) E.foo() called Index: test_doctest.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_doctest.py,v retrieving revision 1.4.14.3 retrieving revision 1.4.14.4 diff -u -d -r1.4.14.3 -r1.4.14.4 --- test_doctest.py 10 Oct 2005 15:50:41 -0000 1.4.14.3 +++ test_doctest.py 16 Oct 2005 05:24:00 -0000 1.4.14.4 @@ -1517,6 +1517,7 @@ ## 44 # # Yee ha! + >>> name = 'test.test_doctest.SampleNewStyleClass' >>> print doctest.testsource(test.test_doctest, name) @@ -1525,6 +1526,7 @@ ## 1 ## 2 ## 3 + >>> name = 'test.test_doctest.SampleClass.a_classmethod' >>> print doctest.testsource(test.test_doctest, name) @@ -1534,6 +1536,7 @@ print SampleClass(0).a_classmethod(10) # Expected: ## 12 + """ def test_debug(): r""" Index: test_doctest2.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_doctest2.py,v retrieving revision 1.3.12.2 retrieving revision 1.3.12.3 diff -u -d -r1.3.12.2 -r1.3.12.3 --- test_doctest2.py 7 Jan 2005 06:58:52 -0000 1.3.12.2 +++ test_doctest2.py 16 Oct 2005 05:24:00 -0000 1.3.12.3 @@ -80,6 +80,7 @@ -12 """) + @staticmethod def statm(): """ A static method. @@ -91,8 +92,7 @@ """ return 666 - statm = staticmethod(statm) - + @classmethod def clsm(cls, val): """ A class method. @@ -104,8 +104,6 @@ """ return val - clsm = classmethod(clsm) - def test_main(): from test import test_doctest2 EXPECTED = 19 Index: test_dumbdbm.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_dumbdbm.py,v retrieving revision 1.6.2.2 retrieving revision 1.6.2.3 diff -u -d -r1.6.2.2 -r1.6.2.3 --- test_dumbdbm.py 7 Jan 2005 06:58:52 -0000 1.6.2.2 +++ test_dumbdbm.py 16 Oct 2005 05:24:00 -0000 1.6.2.3 @@ -74,6 +74,24 @@ self.assertEqual(f['1'], 'hello2') f.close() + def test_line_endings(self): + # test for bug #1172763: dumbdbm would die if the line endings + # weren't what was expected. + f = dumbdbm.open(_fname) + f['1'] = 'hello' + f['2'] = 'hello2' + f.close() + + # Mangle the file by adding \r before each newline + data = open(_fname + '.dir').read() + data = data.replace('\n', '\r\n') + open(_fname + '.dir', 'wb').write(data) + + f = dumbdbm.open(_fname) + self.assertEqual(f['1'], 'hello') + self.assertEqual(f['2'], 'hello2') + + def read_helper(self, f): keys = self.keys_helper(f) for key in self._dict: Index: test_enumerate.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_enumerate.py,v retrieving revision 1.2.2.2 retrieving revision 1.2.2.3 diff -u -d -r1.2.2.2 -r1.2.2.3 --- test_enumerate.py 7 Jan 2005 06:58:52 -0000 1.2.2.2 +++ test_enumerate.py 16 Oct 2005 05:24:00 -0000 1.2.2.3 @@ -1,4 +1,5 @@ import unittest +import sys from test import test_support @@ -143,6 +144,7 @@ def test_len(self): # This is an implementation detail, not an interface requirement + from test.test_iterlen import len for s in ('hello', tuple('hello'), list('hello'), xrange(5)): self.assertEqual(len(reversed(s)), len(s)) r = reversed(s) @@ -175,6 +177,25 @@ self.assertRaises(TypeError, reversed) self.assertRaises(TypeError, reversed, [], 'extra') + def test_bug1229429(self): + # this bug was never in reversed, it was in + # PyObject_CallMethod, and reversed_new calls that sometimes. + if not hasattr(sys, "getrefcount"): + return + def f(): + pass + r = f.__reversed__ = object() + rc = sys.getrefcount(r) + for i in range(10): + try: + reversed(f) + except TypeError: + pass + else: + self.fail("non-callable __reversed__ didn't raise!") + self.assertEqual(rc, sys.getrefcount(r)) + + def test_main(verbose=None): testclasses = (EnumerateTestCase, SubclassTestCase, TestEmpty, TestBig, TestReversed) Index: test_fcntl.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_fcntl.py,v retrieving revision 1.23.2.2 retrieving revision 1.23.2.3 diff -u -d -r1.23.2.2 -r1.23.2.3 --- test_fcntl.py 7 Jan 2005 06:58:52 -0000 1.23.2.2 +++ test_fcntl.py 16 Oct 2005 05:24:00 -0000 1.23.2.3 @@ -22,9 +22,16 @@ if sys.platform in ('netbsd1', 'Darwin1.2', 'darwin', 'freebsd2', 'freebsd3', 'freebsd4', 'freebsd5', 'freebsd6', + 'freebsd7', 'bsdos2', 'bsdos3', 'bsdos4', 'openbsd', 'openbsd2', 'openbsd3'): - lockdata = struct.pack('lxxxxlxxxxlhh', 0, 0, 0, fcntl.F_WRLCK, 0) + if struct.calcsize('l') == 8: + off_t = 'l' + pid_t = 'i' + else: + off_t = 'lxxxx' + pid_t = 'l' + lockdata = struct.pack(off_t+off_t+pid_t+'hh', 0, 0, 0, fcntl.F_WRLCK, 0) elif sys.platform in ['aix3', 'aix4', 'hp-uxB', 'unixware7']: lockdata = struct.pack('hhlllii', fcntl.F_WRLCK, 0, 0, 0, 0, 0, 0) elif sys.platform in ['os2emx']: @@ -39,7 +46,7 @@ f = open(filename, 'w') rv = fcntl.fcntl(f.fileno(), fcntl.F_SETFL, os.O_NONBLOCK) if verbose: - print 'Status from fnctl with O_NONBLOCK: ', rv + print 'Status from fcntl with O_NONBLOCK: ', rv if sys.platform not in ['os2emx']: rv = fcntl.fcntl(f.fileno(), fcntl.F_SETLKW, lockdata) Index: test_file.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_file.py,v retrieving revision 1.9.2.2 retrieving revision 1.9.2.3 diff -u -d -r1.9.2.2 -r1.9.2.3 --- test_file.py 7 Jan 2005 06:58:52 -0000 1.9.2.2 +++ test_file.py 16 Oct 2005 05:24:00 -0000 1.9.2.3 @@ -34,12 +34,22 @@ for attr in 'name', 'mode', 'closed': try: setattr(f, attr, 'oops') - except TypeError: + except (AttributeError, TypeError): pass else: - raise TestFailed('expected TypeError setting file attr %r' % attr) + raise TestFailed('expected exception setting file attr %r' % attr) f.close() +# check invalid mode strings +for mode in ("", "aU", "wU+"): + try: + f = file(TESTFN, mode) + except ValueError: + pass + else: + f.close() + raise TestFailed('%r is an invalid file mode' % mode) + # verify writelines with instance sequence l = UserList(['1', '2']) f = open(TESTFN, 'wb') Index: test_funcattrs.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_funcattrs.py,v retrieving revision 1.11.10.2 retrieving revision 1.11.10.3 diff -u -d -r1.11.10.2 -r1.11.10.3 --- test_funcattrs.py 7 Jan 2005 06:58:52 -0000 1.11.10.2 +++ test_funcattrs.py 16 Oct 2005 05:24:00 -0000 1.11.10.3 @@ -276,6 +276,9 @@ verify(f.func_name == "h") cantset(f, "func_globals", 1) cantset(f, "__name__", 1) + # test that you can access func.__name__ in restricted mode + s = """def f(): pass\nf.__name__""" + exec s in {'__builtins__':{}} def test_func_code(): Index: test_generators.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_generators.py,v retrieving revision 1.34.2.2 retrieving revision 1.34.2.3 diff -u -d -r1.34.2.2 -r1.34.2.3 --- test_generators.py 7 Jan 2005 06:58:52 -0000 1.34.2.2 +++ test_generators.py 16 Oct 2005 05:24:00 -0000 1.34.2.3 @@ -382,7 +382,7 @@ >>> type(i) >>> [s for s in dir(i) if not s.startswith('_')] -['gi_frame', 'gi_running', 'next'] +['close', 'gi_frame', 'gi_running', 'next', 'send', 'throw'] >>> print i.next.__doc__ x.next() -> the next value, or raise StopIteration >>> iter(i) is i @@ -421,6 +421,7 @@ ... self.name = name ... self.parent = None ... self.generator = self.generate() +... self.close = self.generator.close ... ... def generate(self): ... while not self.parent: @@ -482,6 +483,9 @@ A->A B->G C->A D->G E->G F->A G->G H->G I->A J->G K->A L->A M->G merged A into G A->G B->G C->G D->G E->G F->G G->G H->G I->G J->G K->G L->G M->G + +>>> for s in sets: s.close() # break cycles + """ # Emacs turd ' @@ -589,6 +593,7 @@ ... def __init__(self, g): ... self.sofar = [] ... self.fetch = g.next +... self.close = g.close ... ... def __getitem__(self, i): ... sofar, fetch = self.sofar, self.fetch @@ -619,6 +624,7 @@ [200, 216, 225, 240, 243, 250, 256, 270, 288, 300, 320, 324, 360, 375, 384] [400, 405, 432, 450, 480, 486, 500, 512, 540, 576, 600, 625, 640, 648, 675] +>>> m235.close() Ye olde Fibonacci generator, LazyList style. @@ -642,6 +648,85 @@ >>> fib = LazyList(fibgen(1, 2)) >>> firstn(iter(fib), 17) [1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584] +>>> fib.close() + + +Running after your tail with itertools.tee (new in version 2.4) + +The algorithms "m235" (Hamming) and Fibonacci presented above are both +examples of a whole family of FP (functional programming) algorithms +where a function produces and returns a list while the production algorithm +suppose the list as already produced by recursively calling itself. +For these algorithms to work, they must: + +- produce at least a first element without presupposing the existence of + the rest of the list +- produce their elements in a lazy manner + +To work efficiently, the beginning of the list must not be recomputed over +and over again. This is ensured in most FP languages as a built-in feature. +In python, we have to explicitly maintain a list of already computed results +and abandon genuine recursivity. + +This is what had been attempted above with the LazyList class. One problem +with that class is that it keeps a list of all of the generated results and +therefore continually grows. This partially defeats the goal of the generator +concept, viz. produce the results only as needed instead of producing them +all and thereby wasting memory. + +Thanks to itertools.tee, it is now clear "how to get the internal uses of +m235 to share a single generator". + +>>> from itertools import tee +>>> def m235(): +... def _m235(): +... yield 1 +... for n in merge(times(2, m2), +... merge(times(3, m3), +... times(5, m5))): +... yield n +... m2, m3, m5, mRes = tee(_m235(), 4) +... return mRes + +>>> it = m235() +>>> for i in range(5): +... print firstn(it, 15) +[1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 24] +[25, 27, 30, 32, 36, 40, 45, 48, 50, 54, 60, 64, 72, 75, 80] +[81, 90, 96, 100, 108, 120, 125, 128, 135, 144, 150, 160, 162, 180, 192] +[200, 216, 225, 240, 243, 250, 256, 270, 288, 300, 320, 324, 360, 375, 384] +[400, 405, 432, 450, 480, 486, 500, 512, 540, 576, 600, 625, 640, 648, 675] + +The "tee" function does just what we want. It internally keeps a generated +result for as long as it has not been "consumed" from all of the duplicated +iterators, whereupon it is deleted. You can therefore print the hamming +sequence during hours without increasing memory usage, or very little. + +The beauty of it is that recursive running after their tail FP algorithms +are quite straightforwardly expressed with this Python idiom. + + +Ye olde Fibonacci generator, tee style. + +>>> def fib(): +... +... def _isum(g, h): +... while 1: +... yield g.next() + h.next() +... +... def _fib(): +... yield 1 +... yield 2 +... fibTail.next() # throw first away +... for res in _isum(fibHead, fibTail): +... yield res +... +... fibHead, fibTail, fibRes = tee(_fib(), 3) +... return fibRes + +>>> firstn(fib(), 17) +[1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584] + """ # syntax_tests mostly provokes SyntaxErrors. Also fiddling with #if 0 @@ -672,7 +757,7 @@ .. SyntaxError: 'return' with argument inside generator (, line 3) -This one is fine: +These are fine: >>> def f(): ... yield 1 @@ -683,25 +768,17 @@ ... yield 1 ... finally: ... pass -Traceback (most recent call last): - .. -SyntaxError: 'yield' not allowed in a 'try' block with a 'finally' clause (, line 3) >>> def f(): ... try: ... try: ... 1//0 ... except ZeroDivisionError: -... yield 666 # bad because *outer* try has finally +... yield 666 ... except: ... pass ... finally: ... pass -Traceback (most recent call last): - ... -SyntaxError: 'yield' not allowed in a 'try' block with a 'finally' clause (, line 6) - -But this is fine: >>> def f(): ... try: @@ -722,14 +799,16 @@ >>> def f(): ... yield -Traceback (most recent call last): -SyntaxError: invalid syntax +>>> type(f()) + + >>> def f(): ... if 0: ... yield -Traceback (most recent call last): -SyntaxError: invalid syntax +>>> type(f()) + + >>> def f(): ... if 0: @@ -805,7 +884,7 @@ ... if 0: ... yield 2 # because it's a generator Traceback (most recent call last): -SyntaxError: 'return' with argument inside generator (, line 8) +SyntaxError: 'return' with argument inside generator (, line 8) This one caused a crash (see SF bug 567538): @@ -1383,6 +1462,250 @@ """ +coroutine_tests = """\ +Sending a value into a started generator: + +>>> def f(): +... print (yield 1) +... yield 2 +>>> g = f() +>>> g.next() +1 +>>> g.send(42) +42 +2 + +Sending a value into a new generator produces a TypeError: + +>>> f().send("foo") +Traceback (most recent call last): +... +TypeError: can't send non-None value to a just-started generator + + +Yield by itself yields None: + +>>> def f(): yield +>>> list(f()) +[None] + + + +An obscene abuse of a yield expression within a generator expression: + +>>> list((yield 21) for i in range(4)) +[21, None, 21, None, 21, None, 21, None] + +And a more sane, but still weird usage: + +>>> def f(): list(i for i in [(yield 26)]) +>>> type(f()) + + + +Check some syntax errors for yield expressions: + +>>> f=lambda: (yield 1),(yield 2) +Traceback (most recent call last): + ... +SyntaxError: 'yield' outside function (, line 1) + +>>> def f(): return lambda x=(yield): 1 +Traceback (most recent call last): + ... +SyntaxError: 'return' with argument inside generator (, line 1) + +>>> def f(): x = yield = y +Traceback (most recent call last): + ... +SyntaxError: assignment to yield expression not possible (, line 1) + + +Now check some throw() conditions: + +>>> def f(): +... while True: +... try: +... print (yield) +... except ValueError,v: +... print "caught ValueError (%s)" % (v), +>>> import sys +>>> g = f() +>>> g.next() + +>>> g.throw(ValueError) # type only +caught ValueError () + +>>> g.throw(ValueError("xyz")) # value only +caught ValueError (xyz) + +>>> g.throw(ValueError, ValueError(1)) # value+matching type +caught ValueError (1) + +>>> g.throw(ValueError, TypeError(1)) # mismatched type, rewrapped +caught ValueError (1) + +>>> g.throw(ValueError(1), "foo") # bad args +Traceback (most recent call last): + ... +TypeError: instance exception may not have a separate value + +>>> g.throw(ValueError, "foo", 23) # bad args +Traceback (most recent call last): + ... +TypeError: throw() third argument must be a traceback object + +>>> def throw(g,exc): +... try: +... raise exc +... except: +... g.throw(*sys.exc_info()) +>>> throw(g,ValueError) # do it with traceback included +caught ValueError () + +>>> g.send(1) +1 + +>>> throw(g,TypeError) # terminate the generator +Traceback (most recent call last): + ... +TypeError + +>>> print g.gi_frame +None + +>>> g.send(2) +Traceback (most recent call last): + ... +StopIteration + +>>> g.throw(ValueError,6) # throw on closed generator +Traceback (most recent call last): + ... +ValueError: 6 + +>>> f().throw(ValueError,7) # throw on just-opened generator +Traceback (most recent call last): + ... +ValueError: 7 + + +Now let's try closing a generator: + +>>> def f(): +... try: yield +... except GeneratorExit: +... print "exiting" + +>>> g = f() +>>> g.next() +>>> g.close() +exiting +>>> g.close() # should be no-op now + +>>> f().close() # close on just-opened generator should be fine + +>>> def f(): yield # an even simpler generator +>>> f().close() # close before opening +>>> g = f() +>>> g.next() +>>> g.close() # close normally + +And finalization: + +>>> def f(): +... try: yield +... finally: +... print "exiting" + +>>> g = f() +>>> g.next() +>>> del g +exiting + + +Now let's try some ill-behaved generators: + +>>> def f(): +... try: yield +... except GeneratorExit: +... yield "foo!" +>>> g = f() +>>> g.next() +>>> g.close() +Traceback (most recent call last): + ... +RuntimeError: generator ignored GeneratorExit +>>> g.close() + + +Our ill-behaved code should be invoked during GC: + +>>> import sys, StringIO +>>> old, sys.stderr = sys.stderr, StringIO.StringIO() +>>> g = f() +>>> g.next() +>>> del g +>>> sys.stderr.getvalue().startswith( +... "Exception exceptions.RuntimeError: 'generator ignored GeneratorExit' in " +... ) +True +>>> sys.stderr = old + + +And errors thrown during closing should propagate: + +>>> def f(): +... try: yield +... except GeneratorExit: +... raise TypeError("fie!") +>>> g = f() +>>> g.next() +>>> g.close() +Traceback (most recent call last): + ... +TypeError: fie! + + +Ensure that various yield expression constructs make their +enclosing function a generator: + +>>> def f(): x += yield +>>> type(f()) + + +>>> def f(): x = yield +>>> type(f()) + + +>>> def f(): lambda x=(yield): 1 +>>> type(f()) + + +>>> def f(): x=(i for i in (yield) if (yield)) +>>> type(f()) + + +>>> def f(d): d[(yield "a")] = d[(yield "b")] = 27 +>>> data = [1,2] +>>> g = f(data) +>>> type(g) + +>>> g.send(None) +'a' +>>> data +[1, 2] +>>> g.send(0) +'b' +>>> data +[27, 2] +>>> try: g.send(1) +... except StopIteration: pass +>>> data +[27, 27] + +""" + __test__ = {"tut": tutorial_tests, "pep": pep_tests, "email": email_tests, @@ -1390,6 +1713,7 @@ "syntax": syntax_tests, "conjoin": conjoin_tests, "weakref": weakref_tests, + "coroutine": coroutine_tests, } # Magic test name that regrtest.py invokes *after* importing this module. Index: test_genexps.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_genexps.py,v retrieving revision 1.7.4.2 retrieving revision 1.7.4.3 diff -u -d -r1.7.4.2 -r1.7.4.3 --- test_genexps.py 13 Oct 2005 16:11:15 -0000 1.7.4.2 +++ test_genexps.py 16 Oct 2005 05:24:00 -0000 1.7.4.3 @@ -133,8 +133,6 @@ SyntaxError: augmented assignment to generator expression not possible (, line 1) - - ########### Tests borrowed from or inspired by test_generators.py ############ Make a generator that acts like range() Index: test_getargs2.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_getargs2.py,v retrieving revision 1.4.4.2 retrieving revision 1.4.4.3 diff -u -d -r1.4.4.2 -r1.4.4.3 --- test_getargs2.py 7 Jan 2005 06:59:09 -0000 1.4.4.2 +++ test_getargs2.py 16 Oct 2005 05:24:00 -0000 1.4.4.3 @@ -187,16 +187,10 @@ def test_L(self): from _testcapi import getargs_L # L returns 'long long', and does range checking (LLONG_MIN ... LLONG_MAX) - - # XXX There's a bug in getargs.c, format code "L": - # If you pass something else than a Python long, you - # get "Bad argument to internal function". - - # So these three tests are commented out: - -## self.failUnlessEqual(3, getargs_L(3.14)) -## self.failUnlessEqual(99, getargs_L(Long())) -## self.failUnlessEqual(99, getargs_L(Int())) + self.failUnlessRaises(TypeError, getargs_L, "Hello") + self.failUnlessEqual(3, getargs_L(3.14)) + self.failUnlessEqual(99, getargs_L(Long())) + self.failUnlessEqual(99, getargs_L(Int())) self.assertRaises(OverflowError, getargs_L, LLONG_MIN-1) self.failUnlessEqual(LLONG_MIN, getargs_L(LLONG_MIN)) Index: test_glob.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_glob.py,v retrieving revision 1.3.14.2 retrieving revision 1.3.14.3 diff -u -d -r1.3.14.2 -r1.3.14.3 --- test_glob.py 7 Jan 2005 06:59:09 -0000 1.3.14.2 +++ test_glob.py 16 Oct 2005 05:24:00 -0000 1.3.14.3 @@ -2,35 +2,7 @@ from test.test_support import run_unittest, TESTFN import glob import os - -def mkdirs(fname): - if os.path.exists(fname) or fname == '': - return - base, file = os.path.split(fname) - mkdirs(base) - os.mkdir(fname) - -def touchfile(fname): - base, file = os.path.split(fname) - mkdirs(base) - f = open(fname, 'w') - f.close() - -def deltree(fname): - for f in os.listdir(fname): - fullname = os.path.join(fname, f) - if os.path.isdir(fullname): - deltree(fullname) - else: - try: - os.unlink(fullname) - except: - pass - try: - os.rmdir(fname) - except: - pass - +import shutil class GlobTests(unittest.TestCase): @@ -38,7 +10,12 @@ return os.path.normpath(os.path.join(self.tempdir, *parts)) def mktemp(self, *parts): - touchfile(self.norm(*parts)) + filename = self.norm(*parts) + base, file = os.path.split(filename) + if not os.path.exists(base): + os.makedirs(base) + f = open(filename, 'w') + f.close() def setUp(self): self.tempdir = TESTFN+"_dir" @@ -53,7 +30,7 @@ os.symlink(self.norm('broken'), self.norm('sym2')) def tearDown(self): - deltree(self.tempdir) + shutil.rmtree(self.tempdir) def glob(self, *parts): if len(parts) == 1: @@ -61,7 +38,9 @@ else: pattern = os.path.join(*parts) p = os.path.join(self.tempdir, pattern) - return glob.glob(p) + res = glob.glob(p) + self.assertEqual(list(glob.iglob(p)), res) + return res def assertSequencesEqual_noorder(self, l1, l2): self.assertEqual(set(l1), set(l2)) Index: test_grammar.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_grammar.py,v retrieving revision 1.40.8.4 retrieving revision 1.40.8.5 diff -u -d -r1.40.8.4 -r1.40.8.5 --- test_grammar.py 2 Jun 2005 05:14:34 -0000 1.40.8.4 +++ test_grammar.py 16 Oct 2005 05:24:00 -0000 1.40.8.5 @@ -692,8 +692,9 @@ ### testlist: test (',' test)* [','] # These have been exercised enough above -print 'classdef' # 'class' NAME ['(' testlist ')'] ':' suite +print 'classdef' # 'class' NAME ['(' [testlist] ')'] ':' suite class B: pass +class B2(): pass class C1(B): pass class C2(B): pass class D(C1, C2, B): pass Index: test_gzip.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_gzip.py,v retrieving revision 1.9.2.2 retrieving revision 1.9.2.3 diff -u -d -r1.9.2.2 -r1.9.2.3 --- test_gzip.py 7 Jan 2005 06:59:09 -0000 1.9.2.2 +++ test_gzip.py 16 Oct 2005 05:24:00 -0000 1.9.2.3 @@ -1,8 +1,12 @@ -from test.test_support import verify, TESTFN +#! /usr/bin/env python +"""Test script for the gzip module. +""" + +import unittest +from test import test_support import sys, os import gzip -filename = TESTFN data1 = """ int length=DEFAULTALLOC, err = Z_OK; PyObject *RetVal; @@ -16,75 +20,130 @@ /* See http://www.winimage.com/zLibDll for Windows */ """ -f = gzip.GzipFile(filename, 'wb') ; f.write(data1 * 50) -# Try flush and fileno. -f.flush() -f.fileno() -if hasattr(os, 'fsync'): - os.fsync(f.fileno()) -f.close() +class TestGzip(unittest.TestCase): + filename = test_support.TESTFN -# Try reading. -f = gzip.GzipFile(filename, 'r') ; d = f.read() ; f.close() -verify(d == data1*50) + def setUp (self): + pass -# Append to the previous file -f = gzip.GzipFile(filename, 'ab') ; f.write(data2 * 15) ; f.close() + def tearDown (self): + try: + os.unlink(self.filename) + except os.error: + pass -f = gzip.GzipFile(filename, 'rb') ; d = f.read() ; f.close() -verify(d == (data1*50) + (data2*15)) -# Try .readline() with varying line lengths + def test_write (self): + f = gzip.GzipFile(self.filename, 'wb') ; f.write(data1 * 50) -f = gzip.GzipFile(filename, 'rb') -line_length = 0 -while 1: - L = f.readline(line_length) - if L == "" and line_length != 0: break - verify(len(L) <= line_length) - line_length = (line_length + 1) % 50 -f.close() + # Try flush and fileno. + f.flush() + f.fileno() + if hasattr(os, 'fsync'): + os.fsync(f.fileno()) + f.close() -# Try .readlines() + def test_read(self): + self.test_write() + # Try reading. + f = gzip.GzipFile(self.filename, 'r') ; d = f.read() ; f.close() + self.assertEqual(d, data1*50) -f = gzip.GzipFile(filename, 'rb') -L = f.readlines() -f.close() + def test_append(self): + self.test_write() + # Append to the previous file + f = gzip.GzipFile(self.filename, 'ab') ; f.write(data2 * 15) ; f.close() -f = gzip.GzipFile(filename, 'rb') -while 1: - L = f.readlines(150) - if L == []: break -f.close() + f = gzip.GzipFile(self.filename, 'rb') ; d = f.read() ; f.close() + self.assertEqual(d, (data1*50) + (data2*15)) -# Try seek, read test + def test_many_append(self): + # Bug #1074261 was triggered when reading a file that contained + # many, many members. Create such a file and verify that reading it + # works. + f = gzip.open(self.filename, 'wb', 9) + f.write('a') + f.close() + for i in range(0,200): + f = gzip.open(self.filename, "ab", 9) # append + f.write('a') + f.close() -f = gzip.GzipFile(filename) -while 1: - oldpos = f.tell() - line1 = f.readline() - if not line1: break - newpos = f.tell() - f.seek(oldpos) # negative seek - if len(line1)>10: - amount = 10 - else: - amount = len(line1) - line2 = f.read(amount) - verify(line1[:amount] == line2) - f.seek(newpos) # positive seek -f.close() + # Try reading the file + zgfile = gzip.open(self.filename, "rb") + contents = "" + while 1: + ztxt = zgfile.read(8192) + contents += ztxt + if not ztxt: break + zgfile.close() + self.assertEquals(contents, 'a'*201) -# Try seek, write test -f = gzip.GzipFile(filename, 'w') -for pos in range(0, 256, 16): - f.seek(pos) - f.write('GZ\n') -f.close() -f = gzip.GzipFile(filename, 'r') -verify(f.myfileobj.mode == 'rb') -f.close() + def test_readline(self): + self.test_write() + # Try .readline() with varying line lengths -os.unlink(filename) + f = gzip.GzipFile(self.filename, 'rb') + line_length = 0 + while 1: + L = f.readline(line_length) + if L == "" and line_length != 0: break + self.assert_(len(L) <= line_length) + line_length = (line_length + 1) % 50 + f.close() + + def test_readlines(self): + self.test_write() + # Try .readlines() + + f = gzip.GzipFile(self.filename, 'rb') + L = f.readlines() + f.close() + + f = gzip.GzipFile(self.filename, 'rb') + while 1: + L = f.readlines(150) + if L == []: break + f.close() + + def test_seek_read(self): + self.test_write() + # Try seek, read test + + f = gzip.GzipFile(self.filename) + while 1: + oldpos = f.tell() + line1 = f.readline() + if not line1: break + newpos = f.tell() + f.seek(oldpos) # negative seek + if len(line1)>10: + amount = 10 + else: + amount = len(line1) + line2 = f.read(amount) + self.assertEqual(line1[:amount], line2) + f.seek(newpos) # positive seek + f.close() + + def test_seek_write(self): + # Try seek, write test + f = gzip.GzipFile(self.filename, 'w') + for pos in range(0, 256, 16): + f.seek(pos) + f.write('GZ\n') + f.close() + + def test_mode(self): + self.test_write() + f = gzip.GzipFile(self.filename, 'r') + self.assertEqual(f.myfileobj.mode, 'rb') + f.close() + +def test_main(verbose=None): + test_support.run_unittest(TestGzip) + +if __name__ == "__main__": + test_main(verbose=True) Index: test_hmac.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_hmac.py,v retrieving revision 1.4.2.2 retrieving revision 1.4.2.3 diff -u -d -r1.4.2.2 -r1.4.2.3 --- test_hmac.py 7 Jan 2005 06:59:09 -0000 1.4.2.2 +++ test_hmac.py 16 Oct 2005 05:24:00 -0000 1.4.2.3 @@ -105,9 +105,10 @@ def test_default_is_md5(self): # Testing if HMAC defaults to MD5 algorithm. - import md5 + # NOTE: this whitebox test depends on the hmac class internals + import hashlib h = hmac.HMAC("key") - self.failUnless(h.digestmod == md5) + self.failUnless(h.digest_cons == hashlib.md5) def test_exercise_all_methods(self): # Exercising all methods once. @@ -127,8 +128,8 @@ # Testing if attributes are of same type. h1 = hmac.HMAC("key") h2 = h1.copy() - self.failUnless(h1.digestmod == h2.digestmod, - "Modules don't match.") + self.failUnless(h1.digest_cons == h2.digest_cons, + "digest constructors don't match.") self.failUnless(type(h1.inner) == type(h2.inner), "Types of inner don't match.") self.failUnless(type(h1.outer) == type(h2.outer), Index: test_inspect.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_inspect.py,v retrieving revision 1.7.12.2 retrieving revision 1.7.12.3 diff -u -d -r1.7.12.2 -r1.7.12.3 --- test_inspect.py 7 Jan 2005 06:59:09 -0000 1.7.12.2 +++ test_inspect.py 16 Oct 2005 05:24:00 -0000 1.7.12.3 @@ -31,11 +31,11 @@ predicates = set([inspect.isbuiltin, inspect.isclass, inspect.iscode, inspect.isframe, inspect.isfunction, inspect.ismethod, inspect.ismodule, inspect.istraceback]) - + def istest(self, predicate, exp): obj = eval(exp) self.failUnless(predicate(obj), '%s(%s)' % (predicate.__name__, exp)) - + for other in self.predicates - set([predicate]): self.failIf(other(obj), 'not %s(%s)' % (other.__name__, exp)) @@ -44,7 +44,7 @@ # Doc/lib/libinspect.tex claims there are 11 such functions count = len(filter(lambda x:x.startswith('is'), dir(inspect))) self.assertEqual(count, 11, "There are %d (not 11) is* functions" % count) - + def test_excluding_predicates(self): self.istest(inspect.isbuiltin, 'sys.exit') self.istest(inspect.isbuiltin, '[].append') @@ -66,7 +66,7 @@ class TestInterpreterStack(IsTestBase): def __init__(self, *args, **kwargs): unittest.TestCase.__init__(self, *args, **kwargs) - + git.abuse(7, 8, 9) def test_abuse_done(self): @@ -76,7 +76,7 @@ def test_stack(self): self.assert_(len(mod.st) >= 5) self.assertEqual(mod.st[0][1:], - (modfile, 16, 'eggs', [' st = inspect.stack()\n'], 0)) + (modfile, 16, 'eggs', [' st = inspect.stack()\n'], 0)) self.assertEqual(mod.st[1][1:], (modfile, 9, 'spam', [' eggs(b + d, c + f)\n'], 0)) self.assertEqual(mod.st[2][1:], @@ -113,7 +113,7 @@ class GetSourceBase(unittest.TestCase): # Subclasses must override. fodderFile = None - + def __init__(self, *args, **kwargs): unittest.TestCase.__init__(self, *args, **kwargs) @@ -126,10 +126,10 @@ def assertSourceEqual(self, obj, top, bottom): self.assertEqual(inspect.getsource(obj), self.sourcerange(top, bottom)) - + class TestRetrievingSourceCode(GetSourceBase): fodderFile = mod - + def test_getclasses(self): classes = inspect.getmembers(mod, inspect.isclass) self.assertEqual(classes, @@ -147,7 +147,7 @@ ] ] ]) - + def test_getfunctions(self): functions = inspect.getmembers(mod, inspect.isfunction) self.assertEqual(functions, [('eggs', mod.eggs), @@ -172,8 +172,8 @@ self.assertSourceEqual(mod.StupidGit, 21, 46) def test_getsourcefile(self): - self.assertEqual(inspect.getsourcefile(mod.spam), modfile) - self.assertEqual(inspect.getsourcefile(git.abuse), modfile) + self.assertEqual(inspect.getsourcefile(mod.spam), modfile) + self.assertEqual(inspect.getsourcefile(git.abuse), modfile) def test_getfile(self): self.assertEqual(inspect.getfile(mod.StupidGit), mod.__file__) @@ -192,48 +192,69 @@ def test_oneline_lambda(self): # Test inspect.getsource with a one-line lambda function. self.assertSourceEqual(mod2.oll, 25, 25) - + def test_threeline_lambda(self): # Test inspect.getsource with a three-line lambda function, # where the second and third lines are _not_ indented. - self.assertSourceEqual(mod2.tll, 28, 30) - + self.assertSourceEqual(mod2.tll, 28, 30) + def test_twoline_indented_lambda(self): # Test inspect.getsource with a two-line lambda function, # where the second line _is_ indented. self.assertSourceEqual(mod2.tlli, 33, 34) - + def test_onelinefunc(self): # Test inspect.getsource with a regular one-line function. self.assertSourceEqual(mod2.onelinefunc, 37, 37) - + def test_manyargs(self): # Test inspect.getsource with a regular function where # the arguments are on two lines and _not_ indented and # the body on the second line with the last arguments. self.assertSourceEqual(mod2.manyargs, 40, 41) - + def test_twolinefunc(self): # Test inspect.getsource with a regular function where # the body is on two lines, following the argument list and # continued on the next line by a \\. self.assertSourceEqual(mod2.twolinefunc, 44, 45) - + def test_lambda_in_list(self): # Test inspect.getsource with a one-line lambda function # defined in a list, indented. self.assertSourceEqual(mod2.a[1], 49, 49) - + def test_anonymous(self): # Test inspect.getsource with a lambda function defined # as argument to another function. self.assertSourceEqual(mod2.anonymous, 55, 55) +class TestBuggyCases(GetSourceBase): + fodderFile = mod2 + + def test_with_comment(self): + self.assertSourceEqual(mod2.with_comment, 58, 59) + + def test_multiline_sig(self): + self.assertSourceEqual(mod2.multiline_sig[0], 63, 64) + + def test_nested_class(self): + self.assertSourceEqual(mod2.func69().func71, 71, 72) + + def test_one_liner_followed_by_non_name(self): + self.assertSourceEqual(mod2.func77, 77, 77) + + def test_one_liner_dedent_non_name(self): + self.assertSourceEqual(mod2.cls82.func83, 83, 83) + + def test_with_comment_instead_of_docstring(self): + self.assertSourceEqual(mod2.func88, 88, 90) + # Helper for testing classify_class_attrs. def attrs_wo_objs(cls): return [t[:3] for t in inspect.classify_class_attrs(cls)] -class TestClassesAndFunctions(unittest.TestCase): +class TestClassesAndFunctions(unittest.TestCase): def test_classic_mro(self): # Test classic-class method resolution order. class A: pass @@ -284,7 +305,7 @@ def test_getargspec_sublistofone(self): def sublistOfOne((foo)): return 1 - + self.assertArgSpecEquals(sublistOfOne, [['foo']]) def test_classify_oldstyle(self): @@ -414,8 +435,8 @@ def test_main(): run_unittest(TestDecorators, TestRetrievingSourceCode, TestOneliners, + TestBuggyCases, TestInterpreterStack, TestClassesAndFunctions, TestPredicates) if __name__ == "__main__": test_main() - Index: test_ioctl.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_ioctl.py,v retrieving revision 1.2.6.1 retrieving revision 1.2.6.2 diff -u -d -r1.2.6.1 -r1.2.6.2 --- test_ioctl.py 28 Apr 2003 17:28:39 -0000 1.2.6.1 +++ test_ioctl.py 16 Oct 2005 05:24:00 -0000 1.2.6.2 @@ -1,5 +1,5 @@ import unittest -from test_support import TestSkipped, run_unittest +from test.test_support import TestSkipped, run_unittest import os, struct try: import fcntl, termios @@ -16,19 +16,23 @@ class IoctlTests(unittest.TestCase): def test_ioctl(self): - pgrp = os.getpgrp() + # If this process has been put into the background, TIOCGPGRP returns + # the session ID instead of the process group id. + ids = (os.getpgrp(), os.getsid(0)) tty = open("/dev/tty", "r") r = fcntl.ioctl(tty, termios.TIOCGPGRP, " ") - self.assertEquals(pgrp, struct.unpack("i", r)[0]) + rpgrp = struct.unpack("i", r)[0] + self.assert_(rpgrp in ids, "%s not in %s" % (rpgrp, ids)) def test_ioctl_mutate(self): import array buf = array.array('i', [0]) - pgrp = os.getpgrp() + ids = (os.getpgrp(), os.getsid(0)) tty = open("/dev/tty", "r") r = fcntl.ioctl(tty, termios.TIOCGPGRP, buf, 1) + rpgrp = buf[0] self.assertEquals(r, 0) - self.assertEquals(pgrp, buf[0]) + self.assert_(rpgrp in ids, "%s not in %s" % (rpgrp, ids)) def test_main(): run_unittest(IoctlTests) Index: test_isinstance.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_isinstance.py,v retrieving revision 1.3.2.2 retrieving revision 1.3.2.3 diff -u -d -r1.3.2.2 -r1.3.2.3 --- test_isinstance.py 7 Jan 2005 06:59:09 -0000 1.3.2.2 +++ test_isinstance.py 16 Oct 2005 05:24:01 -0000 1.3.2.3 @@ -243,7 +243,8 @@ self.assertEqual(True, issubclass(NewSuper, (NewChild, (NewSuper,)))) self.assertEqual(True, issubclass(int, (long, (float, int)))) - self.assertEqual(True, issubclass(str, (unicode, (Child, NewChild, basestring)))) + if test_support.have_unicode: + self.assertEqual(True, issubclass(str, (unicode, (Child, NewChild, basestring)))) def test_subclass_recursion_limit(self): # make sure that issubclass raises RuntimeError before the C stack is Index: test_iterlen.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_iterlen.py,v retrieving revision 1.2.6.1 retrieving revision 1.2.6.2 diff -u -d -r1.2.6.1 -r1.2.6.2 --- test_iterlen.py 7 Jan 2005 06:59:09 -0000 1.2.6.1 +++ test_iterlen.py 16 Oct 2005 05:24:01 -0000 1.2.6.2 @@ -43,12 +43,22 @@ import unittest from test import test_support -from itertools import repeat, count +from itertools import repeat from collections import deque from UserList import UserList +from __builtin__ import len as _len n = 10 +def len(obj): + try: + return _len(obj) + except TypeError: + try: + return obj._length_cue() + except AttributeError: + raise TypeError + class TestInvariantWithoutMutations(unittest.TestCase): def test_invariant(self): Index: test_itertools.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_itertools.py,v retrieving revision 1.5.6.2 retrieving revision 1.5.6.3 diff -u -d -r1.5.6.2 -r1.5.6.3 --- test_itertools.py 7 Jan 2005 06:59:09 -0000 1.5.6.2 +++ test_itertools.py 16 Oct 2005 05:24:01 -0000 1.5.6.3 @@ -265,6 +265,11 @@ self.assertEqual(list(islice(xrange(10), 2, None)), range(2, 10)) self.assertEqual(list(islice(xrange(10), 1, None, 2)), range(1, 10, 2)) + # Test number of items consumed SF #1171417 + it = iter(range(10)) + self.assertEqual(list(islice(it, 3)), range(3)) + self.assertEqual(list(it), range(3, 10)) + # Test invalid arguments self.assertRaises(TypeError, islice, xrange(10)) self.assertRaises(TypeError, islice, xrange(10), 1, 2, 3, 4) @@ -665,6 +670,7 @@ class LengthTransparency(unittest.TestCase): def test_repeat(self): + from test.test_iterlen import len self.assertEqual(len(repeat(None, 50)), 50) self.assertRaises(TypeError, len, repeat(None)) @@ -799,26 +805,26 @@ ... "Returns the nth item" ... return list(islice(iterable, n, n+1)) ->>> def all(seq, pred=bool): -... "Returns True if pred(x) is True for every element in the iterable" +>>> def all(seq, pred=None): +... "Returns True if pred(x) is true for every element in the iterable" ... for elem in ifilterfalse(pred, seq): ... return False ... return True ->>> def any(seq, pred=bool): -... "Returns True if pred(x) is True for at least one element in the iterable" +>>> def any(seq, pred=None): +... "Returns True if pred(x) is true for at least one element in the iterable" ... for elem in ifilter(pred, seq): ... return True ... return False ->>> def no(seq, pred=bool): -... "Returns True if pred(x) is False for every element in the iterable" +>>> def no(seq, pred=None): +... "Returns True if pred(x) is false for every element in the iterable" ... for elem in ifilter(pred, seq): ... return False ... return True ->>> def quantify(seq, pred=bool): -... "Count how many times the predicate is True in the sequence" +>>> def quantify(seq, pred=None): +... "Count how many times the predicate is true in the sequence" ... return sum(imap(pred, seq)) >>> def padnone(seq): Index: test_locale.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_locale.py,v retrieving revision 1.3.20.2 retrieving revision 1.3.20.3 diff -u -d -r1.3.20.2 -r1.3.20.3 --- test_locale.py 7 Jan 2005 06:59:09 -0000 1.3.20.2 +++ test_locale.py 16 Oct 2005 05:24:01 -0000 1.3.20.3 @@ -7,16 +7,18 @@ oldlocale = locale.setlocale(locale.LC_NUMERIC) if sys.platform.startswith("win"): - tloc = "en" -elif sys.platform.startswith("freebsd"): - tloc = "en_US.US-ASCII" + tlocs = ("en",) else: - tloc = "en_US" + tlocs = ("en_US.UTF-8", "en_US.US-ASCII", "en_US") -try: - locale.setlocale(locale.LC_NUMERIC, tloc) -except locale.Error: - raise ImportError, "test locale %s not supported" % tloc +for tloc in tlocs: + try: + locale.setlocale(locale.LC_NUMERIC, tloc) + break + except locale.Error: + continue +else: + raise ImportError, "test locale not supported (tried %s)"%(', '.join(tlocs)) def testformat(formatstr, value, grouping = 0, output=None): if verbose: Index: test_long.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_long.py,v retrieving revision 1.16.2.2 retrieving revision 1.16.2.3 diff -u -d -r1.16.2.2 -r1.16.2.3 --- test_long.py 7 Jan 2005 06:59:09 -0000 1.16.2.2 +++ test_long.py 16 Oct 2005 05:24:01 -0000 1.16.2.3 @@ -1,6 +1,16 @@ -from test.test_support import verify, verbose, TestFailed, fcmp -from string import join -from random import random, randint +import unittest +from test import test_support + +import random + +# Used for lazy formatting of failure messages +class Frm(object): + def __init__(self, format, *args): + self.format = format + self.args = args + + def __str__(self): + return self.format % self.args # SHIFT should match the value in longintrepr.h for best testing. SHIFT = 15 @@ -26,518 +36,451 @@ special = special + map(lambda x: ~x, special) + \ map(lambda x: -x, special) -# ------------------------------------------------------------ utilities - -# Use check instead of assert so the test still does something -# under -O. - -def check(ok, *args): - if not ok: - raise TestFailed, join(map(str, args), " ") - -# Get quasi-random long consisting of ndigits digits (in base BASE). -# quasi == the most-significant digit will not be 0, and the number -# is constructed to contain long strings of 0 and 1 bits. These are -# more likely than random bits to provoke digit-boundary errors. -# The sign of the number is also random. - -def getran(ndigits): - verify(ndigits > 0) - nbits_hi = ndigits * SHIFT - nbits_lo = nbits_hi - SHIFT + 1 - answer = 0L - nbits = 0 - r = int(random() * (SHIFT * 2)) | 1 # force 1 bits to start - while nbits < nbits_lo: - bits = (r >> 1) + 1 - bits = min(bits, nbits_hi - nbits) - verify(1 <= bits <= SHIFT) - nbits = nbits + bits - answer = answer << bits - if r & 1: - answer = answer | ((1 << bits) - 1) - r = int(random() * (SHIFT * 2)) - verify(nbits_lo <= nbits <= nbits_hi) - if random() < 0.5: - answer = -answer - return answer - -# Get random long consisting of ndigits random digits (relative to base -# BASE). The sign bit is also random. -def getran2(ndigits): - answer = 0L - for i in range(ndigits): - answer = (answer << SHIFT) | randint(0, MASK) - if random() < 0.5: - answer = -answer - return answer +class LongTest(unittest.TestCase): -# --------------------------------------------------------------- divmod + # Get quasi-random long consisting of ndigits digits (in base BASE). + # quasi == the most-significant digit will not be 0, and the number + # is constructed to contain long strings of 0 and 1 bits. These are + # more likely than random bits to provoke digit-boundary errors. + # The sign of the number is also random. -def test_division_2(x, y): - q, r = divmod(x, y) - q2, r2 = x//y, x%y - pab, pba = x*y, y*x - check(pab == pba, "multiplication does not commute for", x, y) - check(q == q2, "divmod returns different quotient than / for", x, y) - check(r == r2, "divmod returns different mod than % for", x, y) - check(x == q*y + r, "x != q*y + r after divmod on", x, y) - if y > 0: - check(0 <= r < y, "bad mod from divmod on", x, y) - else: - check(y < r <= 0, "bad mod from divmod on", x, y) + def getran(self, ndigits): + self.assert_(ndigits > 0) + nbits_hi = ndigits * SHIFT + nbits_lo = nbits_hi - SHIFT + 1 + answer = 0L + nbits = 0 + r = int(random.random() * (SHIFT * 2)) | 1 # force 1 bits to start + while nbits < nbits_lo: + bits = (r >> 1) + 1 + bits = min(bits, nbits_hi - nbits) + self.assert_(1 <= bits <= SHIFT) + nbits = nbits + bits + answer = answer << bits + if r & 1: + answer = answer | ((1 << bits) - 1) + r = int(random.random() * (SHIFT * 2)) + self.assert_(nbits_lo <= nbits <= nbits_hi) + if random.random() < 0.5: + answer = -answer + return answer -def test_division(maxdigits=MAXDIGITS): - if verbose: - print "long / * % divmod" - digits = range(1, maxdigits+1) + range(KARATSUBA_CUTOFF, - KARATSUBA_CUTOFF + 14) - digits.append(KARATSUBA_CUTOFF * 3) - for lenx in digits: - x = getran(lenx) - for leny in digits: - y = getran(leny) or 1L - test_division_2(x, y) -# ------------------------------------------------------------ karatsuba + # Get random long consisting of ndigits random digits (relative to base + # BASE). The sign bit is also random. -def test_karatsuba(): + def getran2(ndigits): + answer = 0L + for i in xrange(ndigits): + answer = (answer << SHIFT) | random.randint(0, MASK) + if random.random() < 0.5: + answer = -answer + return answer - if verbose: - print "Karatsuba" + def check_division(self, x, y): + eq = self.assertEqual + q, r = divmod(x, y) + q2, r2 = x//y, x%y + pab, pba = x*y, y*x + eq(pab, pba, Frm("multiplication does not commute for %r and %r", x, y)) + eq(q, q2, Frm("divmod returns different quotient than / for %r and %r", x, y)) + eq(r, r2, Frm("divmod returns different mod than %% for %r and %r", x, y)) + eq(x, q*y + r, Frm("x != q*y + r after divmod on x=%r, y=%r", x, y)) + if y > 0: + self.assert_(0 <= r < y, Frm("bad mod from divmod on %r and %r", x, y)) + else: + self.assert_(y < r <= 0, Frm("bad mod from divmod on %r and %r", x, y)) - digits = range(1, 5) + range(KARATSUBA_CUTOFF, KARATSUBA_CUTOFF + 10) - digits.extend([KARATSUBA_CUTOFF * 10, KARATSUBA_CUTOFF * 100]) + def test_division(self): + digits = range(1, MAXDIGITS+1) + range(KARATSUBA_CUTOFF, + KARATSUBA_CUTOFF + 14) + digits.append(KARATSUBA_CUTOFF * 3) + for lenx in digits: + x = self.getran(lenx) + for leny in digits: + y = self.getran(leny) or 1L + self.check_division(x, y) - bits = [digit * SHIFT for digit in digits] + def test_karatsuba(self): + digits = range(1, 5) + range(KARATSUBA_CUTOFF, KARATSUBA_CUTOFF + 10) + digits.extend([KARATSUBA_CUTOFF * 10, KARATSUBA_CUTOFF * 100]) - # Test products of long strings of 1 bits -- (2**x-1)*(2**y-1) == - # 2**(x+y) - 2**x - 2**y + 1, so the proper result is easy to check. - for abits in bits: - a = (1L << abits) - 1 - for bbits in bits: - if bbits < abits: - continue - b = (1L << bbits) - 1 - x = a * b - y = ((1L << (abits + bbits)) - - (1L << abits) - - (1L << bbits) + - 1) - check(x == y, "bad result for", a, "*", b, x, y) -# -------------------------------------------------------------- ~ & | ^ + bits = [digit * SHIFT for digit in digits] -def test_bitop_identities_1(x): - check(x & 0 == 0, "x & 0 != 0 for", x) - check(x | 0 == x, "x | 0 != x for", x) - check(x ^ 0 == x, "x ^ 0 != x for", x) - check(x & -1 == x, "x & -1 != x for", x) - check(x | -1 == -1, "x | -1 != -1 for", x) - check(x ^ -1 == ~x, "x ^ -1 != ~x for", x) - check(x == ~~x, "x != ~~x for", x) - check(x & x == x, "x & x != x for", x) - check(x | x == x, "x | x != x for", x) - check(x ^ x == 0, "x ^ x != 0 for", x) - check(x & ~x == 0, "x & ~x != 0 for", x) - check(x | ~x == -1, "x | ~x != -1 for", x) - check(x ^ ~x == -1, "x ^ ~x != -1 for", x) - check(-x == 1 + ~x == ~(x-1), "not -x == 1 + ~x == ~(x-1) for", x) - for n in range(2*SHIFT): - p2 = 2L ** n - check(x << n >> n == x, "x << n >> n != x for", x, n) - check(x // p2 == x >> n, "x // p2 != x >> n for x n p2", x, n, p2) - check(x * p2 == x << n, "x * p2 != x << n for x n p2", x, n, p2) - check(x & -p2 == x >> n << n == x & ~(p2 - 1), - "not x & -p2 == x >> n << n == x & ~(p2 - 1) for x n p2", - x, n, p2) + # Test products of long strings of 1 bits -- (2**x-1)*(2**y-1) == + # 2**(x+y) - 2**x - 2**y + 1, so the proper result is easy to check. + for abits in bits: + a = (1L << abits) - 1 + for bbits in bits: + if bbits < abits: + continue + b = (1L << bbits) - 1 + x = a * b + y = ((1L << (abits + bbits)) - + (1L << abits) - + (1L << bbits) + + 1) + self.assertEqual(x, y, + Frm("bad result for a*b: a=%r, b=%r, x=%r, y=%r", a, b, x, y)) -def test_bitop_identities_2(x, y): - check(x & y == y & x, "x & y != y & x for", x, y) - check(x | y == y | x, "x | y != y | x for", x, y) - check(x ^ y == y ^ x, "x ^ y != y ^ x for", x, y) - check(x ^ y ^ x == y, "x ^ y ^ x != y for", x, y) - check(x & y == ~(~x | ~y), "x & y != ~(~x | ~y) for", x, y) - check(x | y == ~(~x & ~y), "x | y != ~(~x & ~y) for", x, y) - check(x ^ y == (x | y) & ~(x & y), - "x ^ y != (x | y) & ~(x & y) for", x, y) - check(x ^ y == (x & ~y) | (~x & y), - "x ^ y == (x & ~y) | (~x & y) for", x, y) - check(x ^ y == (x | y) & (~x | ~y), - "x ^ y == (x | y) & (~x | ~y) for", x, y) + def check_bitop_identities_1(self, x): + eq = self.assertEqual + eq(x & 0, 0, Frm("x & 0 != 0 for x=%r", x)) + eq(x | 0, x, Frm("x | 0 != x for x=%r", x)) + eq(x ^ 0, x, Frm("x ^ 0 != x for x=%r", x)) + eq(x & -1, x, Frm("x & -1 != x for x=%r", x)) + eq(x | -1, -1, Frm("x | -1 != -1 for x=%r", x)) + eq(x ^ -1, ~x, Frm("x ^ -1 != ~x for x=%r", x)) + eq(x, ~~x, Frm("x != ~~x for x=%r", x)) + eq(x & x, x, Frm("x & x != x for x=%r", x)) + eq(x | x, x, Frm("x | x != x for x=%r", x)) + eq(x ^ x, 0, Frm("x ^ x != 0 for x=%r", x)) + eq(x & ~x, 0, Frm("x & ~x != 0 for x=%r", x)) + eq(x | ~x, -1, Frm("x | ~x != -1 for x=%r", x)) + eq(x ^ ~x, -1, Frm("x ^ ~x != -1 for x=%r", x)) + eq(-x, 1 + ~x, Frm("not -x == 1 + ~x for x=%r", x)) + eq(-x, ~(x-1), Frm("not -x == ~(x-1) forx =%r", x)) + for n in xrange(2*SHIFT): + p2 = 2L ** n + eq(x << n >> n, x, + Frm("x << n >> n != x for x=%r, n=%r", (x, n))) + eq(x // p2, x >> n, + Frm("x // p2 != x >> n for x=%r n=%r p2=%r", (x, n, p2))) + eq(x * p2, x << n, + Frm("x * p2 != x << n for x=%r n=%r p2=%r", (x, n, p2))) + eq(x & -p2, x >> n << n, + Frm("not x & -p2 == x >> n << n for x=%r n=%r p2=%r", (x, n, p2))) + eq(x & -p2, x & ~(p2 - 1), + Frm("not x & -p2 == x & ~(p2 - 1) for x=%r n=%r p2=%r", (x, n, p2))) -def test_bitop_identities_3(x, y, z): - check((x & y) & z == x & (y & z), - "(x & y) & z != x & (y & z) for", x, y, z) - check((x | y) | z == x | (y | z), - "(x | y) | z != x | (y | z) for", x, y, z) - check((x ^ y) ^ z == x ^ (y ^ z), - "(x ^ y) ^ z != x ^ (y ^ z) for", x, y, z) - check(x & (y | z) == (x & y) | (x & z), - "x & (y | z) != (x & y) | (x & z) for", x, y, z) - check(x | (y & z) == (x | y) & (x | z), - "x | (y & z) != (x | y) & (x | z) for", x, y, z) + def check_bitop_identities_2(self, x, y): + eq = self.assertEqual + eq(x & y, y & x, Frm("x & y != y & x for x=%r, y=%r", (x, y))) + eq(x | y, y | x, Frm("x | y != y | x for x=%r, y=%r", (x, y))) + eq(x ^ y, y ^ x, Frm("x ^ y != y ^ x for x=%r, y=%r", (x, y))) + eq(x ^ y ^ x, y, Frm("x ^ y ^ x != y for x=%r, y=%r", (x, y))) + eq(x & y, ~(~x | ~y), Frm("x & y != ~(~x | ~y) for x=%r, y=%r", (x, y))) + eq(x | y, ~(~x & ~y), Frm("x | y != ~(~x & ~y) for x=%r, y=%r", (x, y))) + eq(x ^ y, (x | y) & ~(x & y), + Frm("x ^ y != (x | y) & ~(x & y) for x=%r, y=%r", (x, y))) + eq(x ^ y, (x & ~y) | (~x & y), + Frm("x ^ y == (x & ~y) | (~x & y) for x=%r, y=%r", (x, y))) + eq(x ^ y, (x | y) & (~x | ~y), + Frm("x ^ y == (x | y) & (~x | ~y) for x=%r, y=%r", (x, y))) -def test_bitop_identities(maxdigits=MAXDIGITS): - if verbose: - print "long bit-operation identities" - for x in special: - test_bitop_identities_1(x) - digits = range(1, maxdigits+1) - for lenx in digits: - x = getran(lenx) - test_bitop_identities_1(x) - for leny in digits: - y = getran(leny) - test_bitop_identities_2(x, y) - test_bitop_identities_3(x, y, getran((lenx + leny)//2)) + def check_bitop_identities_3(self, x, y, z): + eq = self.assertEqual + eq((x & y) & z, x & (y & z), + Frm("(x & y) & z != x & (y & z) for x=%r, y=%r, z=%r", (x, y, z))) + eq((x | y) | z, x | (y | z), + Frm("(x | y) | z != x | (y | z) for x=%r, y=%r, z=%r", (x, y, z))) + eq((x ^ y) ^ z, x ^ (y ^ z), + Frm("(x ^ y) ^ z != x ^ (y ^ z) for x=%r, y=%r, z=%r", (x, y, z))) + eq(x & (y | z), (x & y) | (x & z), + Frm("x & (y | z) != (x & y) | (x & z) for x=%r, y=%r, z=%r", (x, y, z))) + eq(x | (y & z), (x | y) & (x | z), + Frm("x | (y & z) != (x | y) & (x | z) for x=%r, y=%r, z=%r", (x, y, z))) -# ------------------------------------------------- hex oct repr str atol + def test_bitop_identities(self): + for x in special: + self.check_bitop_identities_1(x) + digits = xrange(1, MAXDIGITS+1) + for lenx in digits: + x = self.getran(lenx) + self.check_bitop_identities_1(x) + for leny in digits: + y = self.getran(leny) + self.check_bitop_identities_2(x, y) + self.check_bitop_identities_3(x, y, self.getran((lenx + leny)//2)) -def slow_format(x, base): - if (x, base) == (0, 8): - # this is an oddball! - return "0L" - digits = [] - sign = 0 - if x < 0: - sign, x = 1, -x - while x: - x, r = divmod(x, base) - digits.append(int(r)) - digits.reverse() - digits = digits or [0] - return '-'[:sign] + \ - {8: '0', 10: '', 16: '0x'}[base] + \ - join(map(lambda i: "0123456789ABCDEF"[i], digits), '') + \ - "L" + def slow_format(self, x, base): + if (x, base) == (0, 8): + # this is an oddball! + return "0L" + digits = [] + sign = 0 + if x < 0: + sign, x = 1, -x + while x: + x, r = divmod(x, base) + digits.append(int(r)) + digits.reverse() + digits = digits or [0] + return '-'[:sign] + \ + {8: '0', 10: '', 16: '0x'}[base] + \ + "".join(map(lambda i: "0123456789abcdef"[i], digits)) + "L" -def test_format_1(x): - from string import atol - for base, mapper in (8, oct), (10, repr), (16, hex): - got = mapper(x) - expected = slow_format(x, base) - check(got == expected, mapper.__name__, "returned", - got, "but expected", expected, "for", x) - check(atol(got, 0) == x, 'atol("%s", 0) !=' % got, x) - # str() has to be checked a little differently since there's no - # trailing "L" - got = str(x) - expected = slow_format(x, 10)[:-1] - check(got == expected, mapper.__name__, "returned", - got, "but expected", expected, "for", x) + def check_format_1(self, x): + for base, mapper in (8, oct), (10, repr), (16, hex): + got = mapper(x) + expected = self.slow_format(x, base) + msg = Frm("%s returned %r but expected %r for %r", + mapper.__name__, got, expected, x) + self.assertEqual(got, expected, msg) + self.assertEqual(long(got, 0), x, Frm('long("%s", 0) != %r', got, x)) + # str() has to be checked a little differently since there's no + # trailing "L" + got = str(x) + expected = self.slow_format(x, 10)[:-1] + msg = Frm("%s returned %r but expected %r for %r", + mapper.__name__, got, expected, x) + self.assertEqual(got, expected, msg) -def test_format(maxdigits=MAXDIGITS): - if verbose: - print "long str/hex/oct/atol" - for x in special: - test_format_1(x) - for i in range(10): - for lenx in range(1, maxdigits+1): - x = getran(lenx) - test_format_1(x) + def test_format(self): + for x in special: + self.check_format_1(x) + for i in xrange(10): + for lenx in xrange(1, MAXDIGITS+1): + x = self.getran(lenx) + self.check_format_1(x) -# ----------------------------------------------------------------- misc + def test_misc(self): + import sys -def test_misc(maxdigits=MAXDIGITS): - if verbose: - print "long miscellaneous operations" - import sys + # check the extremes in int<->long conversion + hugepos = sys.maxint + hugeneg = -hugepos - 1 + hugepos_aslong = long(hugepos) + hugeneg_aslong = long(hugeneg) + self.assertEqual(hugepos, hugepos_aslong, "long(sys.maxint) != sys.maxint") + self.assertEqual(hugeneg, hugeneg_aslong, + "long(-sys.maxint-1) != -sys.maxint-1") - # check the extremes in int<->long conversion - hugepos = sys.maxint - hugeneg = -hugepos - 1 - hugepos_aslong = long(hugepos) - hugeneg_aslong = long(hugeneg) - check(hugepos == hugepos_aslong, "long(sys.maxint) != sys.maxint") - check(hugeneg == hugeneg_aslong, - "long(-sys.maxint-1) != -sys.maxint-1") + # long -> int should not fail for hugepos_aslong or hugeneg_aslong + try: + self.assertEqual(int(hugepos_aslong), hugepos, + "converting sys.maxint to long and back to int fails") + except OverflowError: + self.fail("int(long(sys.maxint)) overflowed!") + try: + self.assertEqual(int(hugeneg_aslong), hugeneg, + "converting -sys.maxint-1 to long and back to int fails") + except OverflowError: + self.fail("int(long(-sys.maxint-1)) overflowed!") - # long -> int should not fail for hugepos_aslong or hugeneg_aslong - try: - check(int(hugepos_aslong) == hugepos, - "converting sys.maxint to long and back to int fails") - except OverflowError: - raise TestFailed, "int(long(sys.maxint)) overflowed!" - try: - check(int(hugeneg_aslong) == hugeneg, - "converting -sys.maxint-1 to long and back to int fails") - except OverflowError: - raise TestFailed, "int(long(-sys.maxint-1)) overflowed!" + # but long -> int should overflow for hugepos+1 and hugeneg-1 + x = hugepos_aslong + 1 + try: + y = int(x) + except OverflowError: + self.fail("int(long(sys.maxint) + 1) mustn't overflow") + self.assert_(isinstance(y, long), + "int(long(sys.maxint) + 1) should have returned long") - # but long -> int should overflow for hugepos+1 and hugeneg-1 - x = hugepos_aslong + 1 - try: - y = int(x) - except OverflowError: - raise TestFailed, "int(long(sys.maxint) + 1) mustn't overflow" - if not isinstance(y, long): - raise TestFailed("int(long(sys.maxint) + 1) should have returned long") + x = hugeneg_aslong - 1 + try: + y = int(x) + except OverflowError: + self.fail("int(long(-sys.maxint-1) - 1) mustn't overflow") + self.assert_(isinstance(y, long), + "int(long(-sys.maxint-1) - 1) should have returned long") - x = hugeneg_aslong - 1 - try: + class long2(long): + pass + x = long2(1L<<100) y = int(x) - except OverflowError: - raise TestFailed, "int(long(-sys.maxint-1) - 1) mustn't overflow" - if not isinstance(y, long): - raise TestFailed("int(long(-sys.maxint-1) - 1) should have returned long") + self.assert_(type(y) is long, + "overflowing int conversion must return long not long subtype") - class long2(long): - pass - x = long2(1L<<100) - y = int(x) - if type(y) is not long: - raise TestFailed("overflowing int conversion must return long not long subtype") # ----------------------------------- tests of auto int->long conversion -def test_auto_overflow(): - import math, sys - - if verbose: - print "auto-convert int->long on overflow" + def test_auto_overflow(self): + import math, sys - special = [0, 1, 2, 3, sys.maxint-1, sys.maxint, sys.maxint+1] - sqrt = int(math.sqrt(sys.maxint)) - special.extend([sqrt-1, sqrt, sqrt+1]) - special.extend([-i for i in special]) + special = [0, 1, 2, 3, sys.maxint-1, sys.maxint, sys.maxint+1] + sqrt = int(math.sqrt(sys.maxint)) + special.extend([sqrt-1, sqrt, sqrt+1]) + special.extend([-i for i in special]) - def checkit(*args): - # Heavy use of nested scopes here! - verify(got == expected, "for %r expected %r got %r" % - (args, expected, got)) + def checkit(*args): + # Heavy use of nested scopes here! + self.assertEqual(got, expected, + Frm("for %r expected %r got %r", args, expected, got)) - for x in special: - longx = long(x) + for x in special: + longx = long(x) - expected = -longx - got = -x - checkit('-', x) + expected = -longx + got = -x + checkit('-', x) - for y in special: - longy = long(y) + for y in special: + longy = long(y) - expected = longx + longy - got = x + y - checkit(x, '+', y) + expected = longx + longy + got = x + y + checkit(x, '+', y) - expected = longx - longy - got = x - y - checkit(x, '-', y) + expected = longx - longy + got = x - y + checkit(x, '-', y) - expected = longx * longy - got = x * y - checkit(x, '*', y) + expected = longx * longy + got = x * y + checkit(x, '*', y) - if y: - expected = longx / longy - got = x / y - checkit(x, '/', y) + if y: + expected = longx / longy + got = x / y + checkit(x, '/', y) - expected = longx // longy - got = x // y - checkit(x, '//', y) + expected = longx // longy + got = x // y + checkit(x, '//', y) - expected = divmod(longx, longy) - got = divmod(longx, longy) - checkit(x, 'divmod', y) + expected = divmod(longx, longy) + got = divmod(longx, longy) + checkit(x, 'divmod', y) - if abs(y) < 5 and not (x == 0 and y < 0): - expected = longx ** longy - got = x ** y - checkit(x, '**', y) + if abs(y) < 5 and not (x == 0 and y < 0): + expected = longx ** longy + got = x ** y + checkit(x, '**', y) - for z in special: - if z != 0 : - if y >= 0: - expected = pow(longx, longy, long(z)) - got = pow(x, y, z) - checkit('pow', x, y, '%', z) - else: - try: - pow(longx, longy, long(z)) - except TypeError: - pass + for z in special: + if z != 0 : + if y >= 0: + expected = pow(longx, longy, long(z)) + got = pow(x, y, z) + checkit('pow', x, y, '%', z) else: - raise TestFailed("pow%r should have raised " - "TypeError" % ((longx, longy, long(z)),)) - -# ---------------------------------------- tests of long->float overflow - -def test_float_overflow(): - import math - - if verbose: - print "long->float overflow" - - for x in -2.0, -1.0, 0.0, 1.0, 2.0: - verify(float(long(x)) == x) - - shuge = '12345' * 120 - huge = 1L << 30000 - mhuge = -huge - namespace = {'huge': huge, 'mhuge': mhuge, 'shuge': shuge, 'math': math} - for test in ["float(huge)", "float(mhuge)", - "complex(huge)", "complex(mhuge)", - "complex(huge, 1)", "complex(mhuge, 1)", - "complex(1, huge)", "complex(1, mhuge)", - "1. + huge", "huge + 1.", "1. + mhuge", "mhuge + 1.", - "1. - huge", "huge - 1.", "1. - mhuge", "mhuge - 1.", - "1. * huge", "huge * 1.", "1. * mhuge", "mhuge * 1.", - "1. // huge", "huge // 1.", "1. // mhuge", "mhuge // 1.", - "1. / huge", "huge / 1.", "1. / mhuge", "mhuge / 1.", - "1. ** huge", "huge ** 1.", "1. ** mhuge", "mhuge ** 1.", - "math.sin(huge)", "math.sin(mhuge)", - "math.sqrt(huge)", "math.sqrt(mhuge)", # should do better - "math.floor(huge)", "math.floor(mhuge)"]: - - try: - eval(test, namespace) - except OverflowError: - pass - else: - raise TestFailed("expected OverflowError from %s" % test) - - # XXX Perhaps float(shuge) can raise OverflowError on some box? - # The comparison should not. - if float(shuge) == int(shuge): - raise TestFailed("float(shuge) should not equal int(shuge)") - -# ---------------------------------------------- test huge log and log10 + self.assertRaises(TypeError, pow,longx, longy, long(z)) -def test_logs(): - import math + def test_float_overflow(self): + import math - if verbose: - print "log and log10" + for x in -2.0, -1.0, 0.0, 1.0, 2.0: + self.assertEqual(float(long(x)), x) - LOG10E = math.log10(math.e) + shuge = '12345' * 120 + huge = 1L << 30000 + mhuge = -huge + namespace = {'huge': huge, 'mhuge': mhuge, 'shuge': shuge, 'math': math} + for test in ["float(huge)", "float(mhuge)", + "complex(huge)", "complex(mhuge)", + "complex(huge, 1)", "complex(mhuge, 1)", + "complex(1, huge)", "complex(1, mhuge)", + "1. + huge", "huge + 1.", "1. + mhuge", "mhuge + 1.", + "1. - huge", "huge - 1.", "1. - mhuge", "mhuge - 1.", + "1. * huge", "huge * 1.", "1. * mhuge", "mhuge * 1.", + "1. // huge", "huge // 1.", "1. // mhuge", "mhuge // 1.", + "1. / huge", "huge / 1.", "1. / mhuge", "mhuge / 1.", + "1. ** huge", "huge ** 1.", "1. ** mhuge", "mhuge ** 1.", + "math.sin(huge)", "math.sin(mhuge)", + "math.sqrt(huge)", "math.sqrt(mhuge)", # should do better + "math.floor(huge)", "math.floor(mhuge)"]: - for exp in range(10) + [100, 1000, 10000]: - value = 10 ** exp - log10 = math.log10(value) - verify(fcmp(log10, exp) == 0) + self.assertRaises(OverflowError, eval, test, namespace) - # log10(value) == exp, so log(value) == log10(value)/log10(e) == - # exp/LOG10E - expected = exp / LOG10E - log = math.log(value) - verify(fcmp(log, expected) == 0) + # XXX Perhaps float(shuge) can raise OverflowError on some box? + # The comparison should not. + self.assertNotEqual(float(shuge), int(shuge), + "float(shuge) should not equal int(shuge)") - for bad in -(1L << 10000), -2L, 0L: - try: - math.log(bad) - raise TestFailed("expected ValueError from log(<= 0)") - except ValueError: - pass + def test_logs(self): + import math - try: - math.log10(bad) - raise TestFailed("expected ValueError from log10(<= 0)") - except ValueError: - pass + LOG10E = math.log10(math.e) -# ----------------------------------------------- test mixed comparisons + for exp in range(10) + [100, 1000, 10000]: + value = 10 ** exp + log10 = math.log10(value) + self.assertAlmostEqual(log10, exp) -def test_mixed_compares(): - import math - import sys + # log10(value) == exp, so log(value) == log10(value)/log10(e) == + # exp/LOG10E + expected = exp / LOG10E + log = math.log(value) + self.assertAlmostEqual(log, expected) - if verbose: - print "mixed comparisons" + for bad in -(1L << 10000), -2L, 0L: + self.assertRaises(ValueError, math.log, bad) + self.assertRaises(ValueError, math.log10, bad) - # We're mostly concerned with that mixing floats and longs does the - # right stuff, even when longs are too large to fit in a float. - # The safest way to check the results is to use an entirely different - # method, which we do here via a skeletal rational class (which - # represents all Python ints, longs and floats exactly). - class Rat: - def __init__(self, value): - if isinstance(value, (int, long)): - self.n = value - self.d = 1 + def test_mixed_compares(self): + eq = self.assertEqual + import math + import sys - elif isinstance(value, float): - # Convert to exact rational equivalent. - f, e = math.frexp(abs(value)) - assert f == 0 or 0.5 <= f < 1.0 - # |value| = f * 2**e exactly + # We're mostly concerned with that mixing floats and longs does the + # right stuff, even when longs are too large to fit in a float. + # The safest way to check the results is to use an entirely different + # method, which we do here via a skeletal rational class (which + # represents all Python ints, longs and floats exactly). + class Rat: + def __init__(self, value): + if isinstance(value, (int, long)): + self.n = value + self.d = 1 + elif isinstance(value, float): + # Convert to exact rational equivalent. + f, e = math.frexp(abs(value)) + assert f == 0 or 0.5 <= f < 1.0 + # |value| = f * 2**e exactly - # Suck up CHUNK bits at a time; 28 is enough so that we suck - # up all bits in 2 iterations for all known binary double- - # precision formats, and small enough to fit in an int. - CHUNK = 28 - top = 0 - # invariant: |value| = (top + f) * 2**e exactly - while f: - f = math.ldexp(f, CHUNK) - digit = int(f) - assert digit >> CHUNK == 0 - top = (top << CHUNK) | digit - f -= digit - assert 0.0 <= f < 1.0 - e -= CHUNK + # Suck up CHUNK bits at a time; 28 is enough so that we suck + # up all bits in 2 iterations for all known binary double- + # precision formats, and small enough to fit in an int. + CHUNK = 28 + top = 0 + # invariant: |value| = (top + f) * 2**e exactly + while f: + f = math.ldexp(f, CHUNK) + digit = int(f) + assert digit >> CHUNK == 0 + top = (top << CHUNK) | digit + f -= digit + assert 0.0 <= f < 1.0 + e -= CHUNK - # Now |value| = top * 2**e exactly. - if e >= 0: - n = top << e - d = 1 + # Now |value| = top * 2**e exactly. + if e >= 0: + n = top << e + d = 1 + else: + n = top + d = 1 << -e + if value < 0: + n = -n + self.n = n + self.d = d + assert float(n) / float(d) == value else: - n = top - d = 1 << -e - if value < 0: - n = -n - self.n = n - self.d = d - assert float(n) / float(d) == value - - else: - raise TypeError("can't deal with %r" % val) + raise TypeError("can't deal with %r" % val) - def __cmp__(self, other): - if not isinstance(other, Rat): - other = Rat(other) - return cmp(self.n * other.d, self.d * other.n) + def __cmp__(self, other): + if not isinstance(other, Rat): + other = Rat(other) + return cmp(self.n * other.d, self.d * other.n) - cases = [0, 0.001, 0.99, 1.0, 1.5, 1e20, 1e200] - # 2**48 is an important boundary in the internals. 2**53 is an - # important boundary for IEEE double precision. - for t in 2.0**48, 2.0**50, 2.0**53: - cases.extend([t - 1.0, t - 0.3, t, t + 0.3, t + 1.0, - long(t-1), long(t), long(t+1)]) - cases.extend([0, 1, 2, sys.maxint, float(sys.maxint)]) - # 1L<<20000 should exceed all double formats. long(1e200) is to - # check that we get equality with 1e200 above. - t = long(1e200) - cases.extend([0L, 1L, 2L, 1L << 20000, t-1, t, t+1]) - cases.extend([-x for x in cases]) - for x in cases: - Rx = Rat(x) - for y in cases: - Ry = Rat(y) - Rcmp = cmp(Rx, Ry) - xycmp = cmp(x, y) - if Rcmp != xycmp: - raise TestFailed('%r %r %d %d' % (x, y, Rcmp, xycmp)) - if (x == y) != (Rcmp == 0): - raise TestFailed('%r == %r %d' % (x, y, Rcmp)) - if (x != y) != (Rcmp != 0): - raise TestFailed('%r != %r %d' % (x, y, Rcmp)) - if (x < y) != (Rcmp < 0): - raise TestFailed('%r < %r %d' % (x, y, Rcmp)) - if (x <= y) != (Rcmp <= 0): - raise TestFailed('%r <= %r %d' % (x, y, Rcmp)) - if (x > y) != (Rcmp > 0): - raise TestFailed('%r > %r %d' % (x, y, Rcmp)) - if (x >= y) != (Rcmp >= 0): - raise TestFailed('%r >= %r %d' % (x, y, Rcmp)) + cases = [0, 0.001, 0.99, 1.0, 1.5, 1e20, 1e200] + # 2**48 is an important boundary in the internals. 2**53 is an + # important boundary for IEEE double precision. + for t in 2.0**48, 2.0**50, 2.0**53: + cases.extend([t - 1.0, t - 0.3, t, t + 0.3, t + 1.0, + long(t-1), long(t), long(t+1)]) + cases.extend([0, 1, 2, sys.maxint, float(sys.maxint)]) + # 1L<<20000 should exceed all double formats. long(1e200) is to + # check that we get equality with 1e200 above. + t = long(1e200) + cases.extend([0L, 1L, 2L, 1L << 20000, t-1, t, t+1]) + cases.extend([-x for x in cases]) + for x in cases: + Rx = Rat(x) + for y in cases: + Ry = Rat(y) + Rcmp = cmp(Rx, Ry) + xycmp = cmp(x, y) + eq(Rcmp, xycmp, Frm("%r %r %d %d", x, y, Rcmp, xycmp)) + eq(x == y, Rcmp == 0, Frm("%r == %r %d", x, y, Rcmp)) + eq(x != y, Rcmp != 0, Frm("%r != %r %d", x, y, Rcmp)) + eq(x < y, Rcmp < 0, Frm("%r < %r %d", x, y, Rcmp)) + eq(x <= y, Rcmp <= 0, Frm("%r <= %r %d", x, y, Rcmp)) + eq(x > y, Rcmp > 0, Frm("%r > %r %d", x, y, Rcmp)) + eq(x >= y, Rcmp >= 0, Frm("%r >= %r %d", x, y, Rcmp)) -# ---------------------------------------------------------------- do it +def test_main(): + test_support.run_unittest(LongTest) -test_division() -test_karatsuba() -test_bitop_identities() -test_format() -test_misc() -test_auto_overflow() -test_float_overflow() -test_logs() -test_mixed_compares() +if __name__ == "__main__": + test_main() Index: test_macfs.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_macfs.py,v retrieving revision 1.7.4.1 retrieving revision 1.7.4.2 diff -u -d -r1.7.4.1 -r1.7.4.2 --- test_macfs.py 28 Apr 2003 17:28:36 -0000 1.7.4.1 +++ test_macfs.py 16 Oct 2005 05:24:01 -0000 1.7.4.2 @@ -48,9 +48,9 @@ import time fss = macfs.FSSpec(test_support.TESTFN) now = int(time.time()) - fss.SetDates(now, now-1, now-2) + fss.SetDates(now, now+1, now+2) dates = fss.GetDates() - self.assertEqual(dates, (now, now-1, now-2)) + self.assertEqual(dates, (now, now+1, now+2)) def test_ctor_type(self): fss = macfs.FSSpec(test_support.TESTFN) Index: test_marshal.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_marshal.py,v retrieving revision 1.1.16.2 retrieving revision 1.1.16.3 diff -u -d -r1.1.16.2 -r1.1.16.3 --- test_marshal.py 7 Jan 2005 06:59:09 -0000 1.1.16.2 +++ test_marshal.py 16 Oct 2005 05:24:01 -0000 1.1.16.3 @@ -73,7 +73,11 @@ n /= 123.4567 f = 0.0 - s = marshal.dumps(f) + s = marshal.dumps(f, 2) + got = marshal.loads(s) + self.assertEqual(f, got) + # and with version <= 1 (floats marshalled differently then) + s = marshal.dumps(f, 1) got = marshal.loads(s) self.assertEqual(f, got) @@ -81,12 +85,22 @@ while n < small: for expected in (-n, n): f = float(expected) + s = marshal.dumps(f) got = marshal.loads(s) self.assertEqual(f, got) + + s = marshal.dumps(f, 1) + got = marshal.loads(s) + self.assertEqual(f, got) + marshal.dump(f, file(test_support.TESTFN, "wb")) got = marshal.load(file(test_support.TESTFN, "rb")) self.assertEqual(f, got) + + marshal.dump(f, file(test_support.TESTFN, "wb"), 1) + got = marshal.load(file(test_support.TESTFN, "rb")) + self.assertEqual(f, got) n *= 123.4567 os.unlink(test_support.TESTFN) @@ -97,7 +111,7 @@ self.assertEqual(s, new) self.assertEqual(type(s), type(new)) marshal.dump(s, file(test_support.TESTFN, "wb")) - marshal.load(file(test_support.TESTFN, "rb")) + new = marshal.load(file(test_support.TESTFN, "rb")) self.assertEqual(s, new) self.assertEqual(type(s), type(new)) os.unlink(test_support.TESTFN) @@ -108,7 +122,7 @@ self.assertEqual(s, new) self.assertEqual(type(s), type(new)) marshal.dump(s, file(test_support.TESTFN, "wb")) - marshal.load(file(test_support.TESTFN, "rb")) + new = marshal.load(file(test_support.TESTFN, "rb")) self.assertEqual(s, new) self.assertEqual(type(s), type(new)) os.unlink(test_support.TESTFN) @@ -119,7 +133,7 @@ new = marshal.loads(marshal.dumps(b)) self.assertEqual(s, new) marshal.dump(b, file(test_support.TESTFN, "wb")) - marshal.load(file(test_support.TESTFN, "rb")) + new = marshal.load(file(test_support.TESTFN, "rb")) self.assertEqual(s, new) os.unlink(test_support.TESTFN) @@ -148,7 +162,7 @@ new = marshal.loads(marshal.dumps(self.d)) self.assertEqual(self.d, new) marshal.dump(self.d, file(test_support.TESTFN, "wb")) - marshal.load(file(test_support.TESTFN, "rb")) + new = marshal.load(file(test_support.TESTFN, "rb")) self.assertEqual(self.d, new) os.unlink(test_support.TESTFN) @@ -157,7 +171,7 @@ new = marshal.loads(marshal.dumps(lst)) self.assertEqual(lst, new) marshal.dump(lst, file(test_support.TESTFN, "wb")) - marshal.load(file(test_support.TESTFN, "rb")) + new = marshal.load(file(test_support.TESTFN, "rb")) self.assertEqual(lst, new) os.unlink(test_support.TESTFN) @@ -166,10 +180,22 @@ new = marshal.loads(marshal.dumps(t)) self.assertEqual(t, new) marshal.dump(t, file(test_support.TESTFN, "wb")) - marshal.load(file(test_support.TESTFN, "rb")) + new = marshal.load(file(test_support.TESTFN, "rb")) self.assertEqual(t, new) os.unlink(test_support.TESTFN) + def test_sets(self): + for constructor in (set, frozenset): + t = constructor(self.d.keys()) + new = marshal.loads(marshal.dumps(t)) + self.assertEqual(t, new) + self.assert_(isinstance(new, constructor)) + self.assertNotEqual(id(t), id(new)) + marshal.dump(t, file(test_support.TESTFN, "wb")) + new = marshal.load(file(test_support.TESTFN, "rb")) + self.assertEqual(t, new) + os.unlink(test_support.TESTFN) + class BugsTestCase(unittest.TestCase): def test_bug_5888452(self): # Simple-minded check for SF 588452: Debug build crashes @@ -185,6 +211,15 @@ self.assertEquals(marshal.loads(marshal.dumps(5, 0)), 5) self.assertEquals(marshal.loads(marshal.dumps(5, 1)), 5) + def test_fuzz(self): + # simple test that it's at least not *totally* trivial to + # crash from bad marshal data + for c in [chr(i) for i in range(256)]: + try: + marshal.loads(c) + except Exception: + pass + def test_main(): test_support.run_unittest(IntTestCase, FloatTestCase, Index: test_minidom.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_minidom.py,v retrieving revision 1.32.2.2 retrieving revision 1.32.2.3 diff -u -d -r1.32.2.2 -r1.32.2.3 --- test_minidom.py 7 Jan 2005 06:59:10 -0000 1.32.2.2 +++ test_minidom.py 16 Oct 2005 05:24:01 -0000 1.32.2.3 @@ -413,12 +413,19 @@ def testTextNodeRepr(): pass def testWriteXML(): - str = '\n' + str = '' dom = parseString(str) domstr = dom.toxml() dom.unlink() confirm(str == domstr) +def testAltNewline(): + str = '\n\n' + dom = parseString(str) + domstr = dom.toprettyxml(newl="\r\n") + dom.unlink() + confirm(domstr == str.replace("\n", "\r\n")) + def testProcessingInstruction(): dom = parseString('') pi = dom.documentElement.firstChild @@ -878,10 +885,19 @@ def testEncodings(): doc = parseString('') - confirm(doc.toxml() == u'\n\u20ac' - and doc.toxml('utf-8') == '\n\xe2\x82\xac' - and doc.toxml('iso-8859-15') == '\n\xa4', + confirm(doc.toxml() == u'\u20ac' + and doc.toxml('utf-8') == '\xe2\x82\xac' + and doc.toxml('iso-8859-15') == '\xa4', "testEncodings - encoding EURO SIGN") + + # Verify that character decoding errors throw exceptions instead of crashing + try: + doc = parseString('Comment \xe7a va ? Tr\xe8s bien ?') + except UnicodeDecodeError: + pass + else: + print 'parsing with bad encoding should raise a UnicodeDecodeError' + doc.unlink() class UserDataHandler: @@ -1199,7 +1215,7 @@ and not a1.isId and a2.isId and not a3.isId) - # renaming an attribute should not affect it's ID-ness: + # renaming an attribute should not affect its ID-ness: doc.renameNode(a2, xml.dom.EMPTY_NAMESPACE, "an") confirm(e.isSameNode(doc.getElementById("w")) and a2.isId) @@ -1235,7 +1251,7 @@ confirm(a2.isId) confirm(not a3.isId) confirm(doc.getElementById("v") is None) - # renaming an attribute should not affect it's ID-ness: + # renaming an attribute should not affect its ID-ness: doc.renameNode(a2, xml.dom.EMPTY_NAMESPACE, "an") confirm(e.isSameNode(doc.getElementById("w")) and a2.isId) @@ -1271,7 +1287,7 @@ confirm(a2.isId) confirm(not a3.isId) confirm(doc.getElementById("v") is None) - # renaming an attribute should not affect it's ID-ness: + # renaming an attribute should not affect its ID-ness: doc.renameNode(a2, xml.dom.EMPTY_NAMESPACE, "an") confirm(e.isSameNode(doc.getElementById("w")) and a2.isId) Index: test_mmap.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_mmap.py,v retrieving revision 1.22.2.1 retrieving revision 1.22.2.2 diff -u -d -r1.22.2.1 -r1.22.2.2 --- test_mmap.py 28 Apr 2003 17:28:32 -0000 1.22.2.1 +++ test_mmap.py 16 Oct 2005 05:24:01 -0000 1.22.2.2 @@ -120,6 +120,14 @@ else: verify(0, 'Could seek beyond the new size') + # Check that the underlying file is truncated too + # (bug #728515) + f = open(TESTFN) + f.seek(0, 2) + verify(f.tell() == 512, 'Underlying file not truncated') + f.close() + verify(m.size() == 512, 'New size not reflected in file') + m.close() finally: @@ -311,6 +319,42 @@ finally: os.unlink(TESTFN) + # test mapping of entire file by passing 0 for map length + if hasattr(os, "stat"): + print " Ensuring that passing 0 as map length sets map size to current file size." + f = open(TESTFN, "w+") + + try: + f.write(2**16 * 'm') # Arbitrary character + f.close() + + f = open(TESTFN, "rb+") + mf = mmap.mmap(f.fileno(), 0) + verify(len(mf) == 2**16, "Map size should equal file size.") + vereq(mf.read(2**16), 2**16 * "m") + mf.close() + f.close() + + finally: + os.unlink(TESTFN) + + # test mapping of entire file by passing 0 for map length + if hasattr(os, "stat"): + print " Ensuring that passing 0 as map length sets map size to current file size." + f = open(TESTFN, "w+") + try: + f.write(2**16 * 'm') # Arbitrary character + f.close() + + f = open(TESTFN, "rb+") + mf = mmap.mmap(f.fileno(), 0) + verify(len(mf) == 2**16, "Map size should equal file size.") + vereq(mf.read(2**16), 2**16 * "m") + mf.close() + f.close() + + finally: + os.unlink(TESTFN) print ' Test passed' Index: test_mutants.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_mutants.py,v retrieving revision 1.6.22.1 retrieving revision 1.6.22.2 diff -u -d -r1.6.22.1 -r1.6.22.2 --- test_mutants.py 28 Apr 2003 17:28:31 -0000 1.6.22.1 +++ test_mutants.py 16 Oct 2005 05:24:01 -0000 1.6.22.2 @@ -69,14 +69,12 @@ elif keys: # Delete a key at random. + mutate = 0 # disable mutation until key deleted i = random.randrange(len(keys)) key = keys[i] del target[key] - # CAUTION: don't use keys.remove(key) here. Or do . The - # point is that .remove() would trigger more comparisons, and so - # also more calls to this routine. We're mutating often enough - # without that. del keys[i] + mutate = 1 # A horrid class that triggers random mutations of dict1 and dict2 when # instances are compared. Index: test_new.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_new.py,v retrieving revision 1.14.8.2 retrieving revision 1.14.8.3 diff -u -d -r1.14.8.2 -r1.14.8.3 --- test_new.py 7 Jan 2005 06:59:10 -0000 1.14.8.2 +++ test_new.py 16 Oct 2005 05:24:01 -0000 1.14.8.3 @@ -47,6 +47,16 @@ verify(c.get_yolks() == 1 and c.get_more_yolks() == 4, 'Broken call of hand-crafted instance method') +im = new.instancemethod(break_yolks, c) +im() +verify(c.get_yolks() == -1) +try: + new.instancemethod(break_yolks, None) +except TypeError: + pass +else: + raise TestFailed, "dangerous instance method creation allowed" + # It's unclear what the semantics should be for a code object compiled at # module scope, but bound and run in a function. In CPython, `c' is global # (by accident?) while in Jython, `c' is local. The intent of the test Index: test_operator.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_operator.py,v retrieving revision 1.8.14.2 retrieving revision 1.8.14.3 diff -u -d -r1.8.14.2 -r1.8.14.3 --- test_operator.py 7 Jan 2005 06:59:10 -0000 1.8.14.2 +++ test_operator.py 16 Oct 2005 05:24:01 -0000 1.8.14.3 @@ -324,7 +324,14 @@ f = operator.attrgetter(2) self.assertRaises(TypeError, f, a) self.assertRaises(TypeError, operator.attrgetter) - self.assertRaises(TypeError, operator.attrgetter, 1, 2) + + # multiple gets + record = A() + record.x = 'X' + record.y = 'Y' + record.z = 'Z' + self.assertEqual(operator.attrgetter('x','z','y')(record), ('X', 'Z', 'Y')) + self.assertRaises(TypeError, operator.attrgetter('x', (), 'y'), record) class C(object): def __getattr(self, name): @@ -346,7 +353,6 @@ f = operator.itemgetter('name') self.assertRaises(TypeError, f, a) self.assertRaises(TypeError, operator.itemgetter) - self.assertRaises(TypeError, operator.itemgetter, 1, 2) d = dict(key='val') f = operator.itemgetter('key') @@ -361,9 +367,29 @@ self.assertEqual(sorted(inventory, key=getcount), [('orange', 1), ('banana', 2), ('apple', 3), ('pear', 5)]) -def test_main(): - test_support.run_unittest(OperatorTestCase) + # multiple gets + data = map(str, range(20)) + self.assertEqual(operator.itemgetter(2,10,5)(data), ('2', '10', '5')) + self.assertRaises(TypeError, operator.itemgetter(2, 'x', 5), data) + + +def test_main(verbose=None): + import sys + test_classes = ( + OperatorTestCase, + ) + test_support.run_unittest(*test_classes) + + # verify reference counting + if verbose and hasattr(sys, "gettotalrefcount"): + import gc + counts = [None] * 5 + for i in xrange(len(counts)): + test_support.run_unittest(*test_classes) + gc.collect() + counts[i] = sys.gettotalrefcount() + print counts if __name__ == "__main__": - test_main() + test_main(verbose=True) Index: test_os.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_os.py,v retrieving revision 1.11.2.2 retrieving revision 1.11.2.3 diff -u -d -r1.11.2.2 -r1.11.2.3 --- test_os.py 7 Jan 2005 06:59:10 -0000 1.11.2.2 +++ test_os.py 16 Oct 2005 05:24:01 -0000 1.11.2.3 @@ -111,7 +111,11 @@ for name in dir(stat): if name[:3] == 'ST_': attr = name.lower() - self.assertEquals(getattr(result, attr), + if name.endswith("TIME"): + def trunc(x): return int(x) + else: + def trunc(x): return x + self.assertEquals(trunc(getattr(result, attr)), result[getattr(stat, name)]) self.assert_(attr in members) @@ -222,6 +226,13 @@ os.environ.clear() os.environ.update(self.__save) + # Bug 1110478 + def test_update2(self): + if os.path.exists("/bin/sh"): + os.environ.update(HELLO="World") + value = os.popen("/bin/sh -c 'echo $HELLO'").read().strip() + self.assertEquals(value, "World") + class WalkTests(unittest.TestCase): """Tests for os.walk().""" Index: test_ossaudiodev.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_ossaudiodev.py,v retrieving revision 1.4.8.2 retrieving revision 1.4.8.3 diff -u -d -r1.4.8.2 -r1.4.8.3 --- test_ossaudiodev.py 7 Jan 2005 06:59:10 -0000 1.4.8.2 +++ test_ossaudiodev.py 16 Oct 2005 05:24:01 -0000 1.4.8.3 @@ -56,6 +56,19 @@ dsp.getptr() dsp.fileno() + # Make sure the read-only attributes work. + assert dsp.closed is False, "dsp.closed is not False" + assert dsp.name == "/dev/dsp" + assert dsp.mode == 'w', "bad dsp.mode: %r" % dsp.mode + + # And make sure they're really read-only. + for attr in ('closed', 'name', 'mode'): + try: + setattr(dsp, attr, 42) + raise RuntimeError("dsp.%s not read-only" % attr) + except TypeError: + pass + # set parameters based on .au file headers dsp.setparameters(AFMT_S16_NE, nchannels, rate) t1 = time.time() @@ -65,9 +78,7 @@ t2 = time.time() print "elapsed time: %.1f sec" % (t2-t1) -def test_setparameters(): - dsp = ossaudiodev.open("w") - +def test_setparameters(dsp): # Two configurations for testing: # config1 (8-bit, mono, 8 kHz) should work on even the most # ancient and crufty sound card, but maybe not on special- @@ -96,11 +107,16 @@ assert result == (fmt, channels, rate), \ "setparameters%r: returned %r" % (config + result) +def test_bad_setparameters(dsp): + # Now try some configurations that are presumably bogus: eg. 300 # channels currently exceeds even Hollywood's ambitions, and # negative sampling rate is utter nonsense. setparameters() should # accept these in non-strict mode, returning something other than # was requested, but should barf in strict mode. + fmt = AFMT_S16_NE + rate = 44100 + channels = 2 for config in [(fmt, 300, rate), # ridiculous nchannels (fmt, -5, rate), # impossible nchannels (fmt, channels, -50), # impossible rate @@ -119,6 +135,16 @@ def test(): (data, rate, ssize, nchannels) = read_sound_file(findfile('audiotest.au')) play_sound_file(data, rate, ssize, nchannels) - test_setparameters() + + dsp = ossaudiodev.open("w") + try: + test_setparameters(dsp) + + # Disabled because it fails under Linux 2.6 with ALSA's OSS + # emulation layer. + #test_bad_setparameters(dsp) + finally: + dsp.close() + assert dsp.closed is True, "dsp.closed is not True" test() Index: test_parser.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_parser.py,v retrieving revision 1.11.2.3 retrieving revision 1.11.2.4 diff -u -d -r1.11.2.3 -r1.11.2.4 --- test_parser.py 11 Oct 2005 21:37:28 -0000 1.11.2.3 +++ test_parser.py 16 Oct 2005 05:24:01 -0000 1.11.2.4 @@ -29,11 +29,22 @@ def test_yield_statement(self): self.check_suite("def f(): yield 1") + self.check_suite("def f(): yield") + self.check_suite("def f(): x += yield") + self.check_suite("def f(): x = yield 1") + self.check_suite("def f(): x = y = yield 1") + self.check_suite("def f(): x = yield") + self.check_suite("def f(): x = y = yield") + self.check_suite("def f(): 1 + (yield)*2") + self.check_suite("def f(): (yield 1)*2") self.check_suite("def f(): return; yield 1") self.check_suite("def f(): yield 1; return") self.check_suite("def f():\n" " for x in range(30):\n" " yield x\n") + self.check_suite("def f():\n" + " if (yield):\n" + " yield x\n") def test_expressions(self): self.check_expr("foo(1)") @@ -127,6 +138,9 @@ self.check_suite("@funcattrs()\n" "def f(): pass") + def test_class_defs(self): + self.check_suite("class foo():pass") + def test_import_from_statement(self): self.check_suite("from sys.path import *") self.check_suite("from sys.path import dirname") Index: test_popen2.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_popen2.py,v retrieving revision 1.6.2.2 retrieving revision 1.6.2.3 diff -u -d -r1.6.2.2 -r1.6.2.3 --- test_popen2.py 7 Jan 2005 06:59:10 -0000 1.6.2.2 +++ test_popen2.py 16 Oct 2005 05:24:01 -0000 1.6.2.3 @@ -62,7 +62,7 @@ raise ValueError("wrote %r read %r" % (teststr, got)) got = e.read() if got: - raise ValueError("unexected %r on stderr" % (got,)) + raise ValueError("unexpected %r on stderr" % (got,)) for inst in popen2._active[:]: inst.wait() if popen2._active: Index: test_posix.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_posix.py,v retrieving revision 1.5.6.2 retrieving revision 1.5.6.3 diff -u -d -r1.5.6.2 -r1.5.6.3 --- test_posix.py 7 Jan 2005 06:59:10 -0000 1.5.6.2 +++ test_posix.py 16 Oct 2005 05:24:01 -0000 1.5.6.3 @@ -94,6 +94,37 @@ self.fdopen_helper('r') self.fdopen_helper('r', 100) + def test_osexlock(self): + if hasattr(posix, "O_EXLOCK"): + fd = os.open(test_support.TESTFN, + os.O_WRONLY|os.O_EXLOCK|os.O_CREAT) + self.assertRaises(OSError, os.open, test_support.TESTFN, + os.O_WRONLY|os.O_EXLOCK|os.O_NONBLOCK) + os.close(fd) + + if hasattr(posix, "O_SHLOCK"): + fd = os.open(test_support.TESTFN, + os.O_WRONLY|os.O_SHLOCK|os.O_CREAT) + self.assertRaises(OSError, os.open, test_support.TESTFN, + os.O_WRONLY|os.O_EXLOCK|os.O_NONBLOCK) + os.close(fd) + + def test_osshlock(self): + if hasattr(posix, "O_SHLOCK"): + fd1 = os.open(test_support.TESTFN, + os.O_WRONLY|os.O_SHLOCK|os.O_CREAT) + fd2 = os.open(test_support.TESTFN, + os.O_WRONLY|os.O_SHLOCK|os.O_CREAT) + os.close(fd2) + os.close(fd1) + + if hasattr(posix, "O_EXLOCK"): + fd = os.open(test_support.TESTFN, + os.O_WRONLY|os.O_SHLOCK|os.O_CREAT) + self.assertRaises(OSError, os.open, test_support.TESTFN, + os.O_RDONLY|os.O_EXLOCK|os.O_NONBLOCK) + os.close(fd) + def test_fstat(self): if hasattr(posix, 'fstat'): fp = open(test_support.TESTFN) Index: test_posixpath.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_posixpath.py,v retrieving revision 1.4.26.2 retrieving revision 1.4.26.3 diff -u -d -r1.4.26.2 -r1.4.26.3 --- test_posixpath.py 7 Jan 2005 06:59:10 -0000 1.4.26.2 +++ test_posixpath.py 16 Oct 2005 05:24:01 -0000 1.4.26.3 @@ -477,6 +477,26 @@ self.safe_rmdir(ABSTFN + "/k") self.safe_rmdir(ABSTFN) + def test_realpath_resolve_first(self): + # Bug #1213894: The first component of the path, if not absolute, + # must be resolved too. + + try: + old_path = abspath('.') + os.mkdir(ABSTFN) + os.mkdir(ABSTFN + "/k") + os.symlink(ABSTFN, ABSTFN + "link") + os.chdir(dirname(ABSTFN)) + + base = basename(ABSTFN) + self.assertEqual(realpath(base + "link"), ABSTFN) + self.assertEqual(realpath(base + "link/k"), ABSTFN + "/k") + finally: + os.chdir(old_path) + self.safe_remove(ABSTFN + "link") + self.safe_rmdir(ABSTFN + "/k") + self.safe_rmdir(ABSTFN) + # Convenience functions for removing temporary files. def pass_os_error(self, func, filename): try: func(filename) Index: test_profile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_profile.py,v retrieving revision 1.2.14.1 retrieving revision 1.2.14.2 diff -u -d -r1.2.14.1 -r1.2.14.2 --- test_profile.py 7 Jan 2005 06:59:10 -0000 1.2.14.1 +++ test_profile.py 16 Oct 2005 05:24:01 -0000 1.2.14.2 @@ -10,7 +10,7 @@ # included in the profile and would appear to consume all the time.) ticks = 0 -def test_main(): +def test_1(): global ticks ticks = 0 prof = profile.Profile(timer) @@ -95,6 +95,25 @@ vereq (x, 1) os.unlink (TESTFN) +def test_3(): + result = [] + def testfunc1(): + try: len(None) + except: pass + try: len(None) + except: pass + result.append(True) + def testfunc2(): + testfunc1() + testfunc1() + profile.runctx("testfunc2()", locals(), locals(), TESTFN) + vereq(result, [True, True]) + os.unlink(TESTFN) + +def test_main(): + test_1() + test_2() + test_3() + if __name__ == "__main__": test_main() - test_2() Index: test_re.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_re.py,v retrieving revision 1.31.18.2 retrieving revision 1.31.18.3 diff -u -d -r1.31.18.2 -r1.31.18.3 --- test_re.py 7 Jan 2005 06:59:10 -0000 1.31.18.2 +++ test_re.py 16 Oct 2005 05:24:01 -0000 1.31.18.3 @@ -235,6 +235,16 @@ self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(), ('a', '')) + # Tests for bug #1177831: exercise groups other than the first group + p = re.compile('(?Pa)(?Pb)?((?(g2)c|d))') + self.assertEqual(p.match('abc').groups(), + ('a', 'b', 'c')) + self.assertEqual(p.match('ad').groups(), + ('a', None, 'd')) + self.assertEqual(p.match('abd'), None) + self.assertEqual(p.match('ac'), None) + + def test_re_groupref(self): self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(), ('|', 'a')) @@ -287,6 +297,9 @@ self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None) self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None) + self.assertEqual(re.match("^x{}$", "xxx"), None) + self.assertNotEqual(re.match("^x{}$", "x{}"), None) + def test_getattr(self): self.assertEqual(re.match("(a)", "a").pos, 0) self.assertEqual(re.match("(a)", "a").endpos, 1) Index: test_richcmp.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_richcmp.py,v retrieving revision 1.6.20.2 retrieving revision 1.6.20.3 diff -u -d -r1.6.20.2 -r1.6.20.3 --- test_richcmp.py 7 Jan 2005 06:59:10 -0000 1.6.20.2 +++ test_richcmp.py 16 Oct 2005 05:24:01 -0000 1.6.20.3 @@ -259,8 +259,8 @@ def test_dicts(self): # Verify that __eq__ and __ne__ work for dicts even if the keys and - # values don't support anything other than __eq__ and __ne__. Complex - # numbers are a fine example of that. + # values don't support anything other than __eq__ and __ne__ (and + # __hash__). Complex numbers are a fine example of that. import random imag1a = {} for i in range(50): Index: test_set.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_set.py,v retrieving revision 1.16.4.1 retrieving revision 1.16.4.2 diff -u -d -r1.16.4.1 -r1.16.4.2 --- test_set.py 7 Jan 2005 06:59:10 -0000 1.16.4.1 +++ test_set.py 16 Oct 2005 05:24:01 -0000 1.16.4.2 @@ -5,6 +5,8 @@ import copy import pickle import os +from random import randrange, shuffle +import sys class PassThru(Exception): pass @@ -13,6 +15,12 @@ raise PassThru yield 1 +class BadCmp: + def __hash__(self): + return 1 + def __cmp__(self, other): + raise RuntimeError + class TestJointOps(unittest.TestCase): # Tests common to both set and frozenset @@ -202,6 +210,40 @@ self.assertNotEqual(id(t), id(newt)) self.assertEqual(t.value + 1, newt.value) + def test_gc(self): + # Create a nest of cycles to exercise overall ref count check + class A: + pass + s = set(A() for i in xrange(1000)) + for elem in s: + elem.cycle = s + elem.sub = elem + elem.set = set([elem]) + + def test_subclass_with_custom_hash(self): + # Bug #1257731 + class H(self.thetype): + def __hash__(self): + return id(self) + s=H() + f=set() + f.add(s) + self.assert_(s in f) + f.remove(s) + f.add(s) + f.discard(s) + + def test_badcmp(self): + s = self.thetype([BadCmp()]) + # Detect comparison errors during insertion and lookup + self.assertRaises(RuntimeError, self.thetype, [BadCmp(), BadCmp()]) + self.assertRaises(RuntimeError, s.__contains__, BadCmp()) + # Detect errors during mutating operations + if hasattr(s, 'add'): + self.assertRaises(RuntimeError, s.add, BadCmp()) + self.assertRaises(RuntimeError, s.discard, BadCmp()) + self.assertRaises(RuntimeError, s.remove, BadCmp()) + class TestSet(TestJointOps): thetype = set @@ -359,6 +401,18 @@ else: self.assert_(c not in self.s) + def test_inplace_on_self(self): + t = self.s.copy() + t |= t + self.assertEqual(t, self.s) + t &= t + self.assertEqual(t, self.s) + t -= t + self.assertEqual(t, self.thetype()) + t = self.s.copy() + t ^= t + self.assertEqual(t, self.thetype()) + def test_weakref(self): s = self.thetype('gallahad') p = proxy(s) @@ -366,6 +420,11 @@ s = None self.assertRaises(ReferenceError, str, p) + # C API test only available in a debug build + if hasattr(sys, "gettotalrefcount"): + def test_c_api(self): + self.assertEqual(set('abc').test_c_api(), True) + class SetSubclass(set): pass @@ -380,6 +439,15 @@ s.__init__(self.otherword) self.assertEqual(s, set(self.word)) + def test_singleton_empty_frozenset(self): + f = frozenset() + efs = [frozenset(), frozenset([]), frozenset(()), frozenset(''), + frozenset(), frozenset([]), frozenset(()), frozenset(''), + frozenset(xrange(0)), frozenset(frozenset()), + frozenset(f), f] + # All of the empty frozensets should have just one id() + self.assertEqual(len(set(map(id, efs))), 1) + def test_constructor_identity(self): s = self.thetype(range(3)) t = self.thetype(s) @@ -389,6 +457,15 @@ self.assertEqual(hash(self.thetype('abcdeb')), hash(self.thetype('ebecda'))) + # make sure that all permutations give the same hash value + n = 100 + seq = [randrange(n) for i in xrange(n)] + results = set() + for i in xrange(200): + shuffle(seq) + results.add(hash(self.thetype(seq))) + self.assertEqual(len(results), 1) + def test_copy(self): dup = self.s.copy() self.assertEqual(id(self.s), id(dup)) @@ -436,6 +513,17 @@ t = self.thetype(s) self.assertEqual(s, t) + def test_singleton_empty_frozenset(self): + Frozenset = self.thetype + f = frozenset() + F = Frozenset() + efs = [Frozenset(), Frozenset([]), Frozenset(()), Frozenset(''), + Frozenset(), Frozenset([]), Frozenset(()), Frozenset(''), + Frozenset(xrange(0)), Frozenset(Frozenset()), + Frozenset(frozenset()), f, F, Frozenset(f), Frozenset(F)] + # All empty frozenset subclass instances should have different ids + self.assertEqual(len(set(map(id, efs))), len(efs)) + # Tests taken from test_sets.py ============================================= empty_set = set() @@ -1307,7 +1395,6 @@ #============================================================================== def test_main(verbose=None): - import sys from test import test_sets test_classes = ( TestSet, Index: test_sets.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_sets.py,v retrieving revision 1.23.6.2 retrieving revision 1.23.6.3 diff -u -d -r1.23.6.2 -r1.23.6.3 --- test_sets.py 7 Jan 2005 06:59:11 -0000 1.23.6.2 +++ test_sets.py 16 Oct 2005 05:24:01 -0000 1.23.6.3 @@ -243,6 +243,19 @@ self.assertRaises(TypeError, cmp, a, 12) self.assertRaises(TypeError, cmp, "abc", a) + def test_inplace_on_self(self): + t = self.set.copy() + t |= t + self.assertEqual(t, self.set) + t &= t + self.assertEqual(t, self.set) + t -= t + self.assertEqual(len(t), 0) + t = self.set.copy() + t ^= t + self.assertEqual(len(t), 0) + + #============================================================================== class TestUpdateOps(unittest.TestCase): Index: test_site.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_site.py,v retrieving revision 1.6.6.1 retrieving revision 1.6.6.2 diff -u -d -r1.6.6.1 -r1.6.6.2 --- test_site.py 7 Jan 2005 06:59:12 -0000 1.6.6.1 +++ test_site.py 16 Oct 2005 05:24:01 -0000 1.6.6.2 @@ -117,7 +117,7 @@ Make sure to call self.cleanup() to undo anything done by this method. """ - FILE = open(self.file_path, 'wU') + FILE = open(self.file_path, 'w') try: print>>FILE, "#import @bad module name" print>>FILE, "\n" Index: test_socket.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_socket.py,v retrieving revision 1.39.2.2 retrieving revision 1.39.2.3 diff -u -d -r1.39.2.2 -r1.39.2.3 --- test_socket.py 7 Jan 2005 06:59:12 -0000 1.39.2.2 +++ test_socket.py 16 Oct 2005 05:24:01 -0000 1.39.2.3 @@ -267,7 +267,7 @@ except socket.error: # Probably a similar problem as above; skip this test return - all_host_names = [hname] + aliases + all_host_names = [hostname, hname] + aliases fqhn = socket.getfqdn() if not fqhn in all_host_names: self.fail("Error testing host resolution mechanisms.") @@ -312,7 +312,7 @@ # I've ordered this by protocols that have both a tcp and udp # protocol, at least for modern Linuxes. if sys.platform in ('linux2', 'freebsd4', 'freebsd5', 'freebsd6', - 'darwin'): + 'freebsd7', 'darwin'): # avoid the 'echo' service on this platform, as there is an # assumption breaking non-standard port/protocol entry services = ('daytime', 'qotd', 'domain') @@ -380,10 +380,12 @@ self.assertEquals('\xff\x00\xff\x00', f('255.0.255.0')) self.assertEquals('\xaa\xaa\xaa\xaa', f('170.170.170.170')) self.assertEquals('\x01\x02\x03\x04', f('1.2.3.4')) + self.assertEquals('\xff\xff\xff\xff', f('255.255.255.255')) self.assertEquals('\x00\x00\x00\x00', g('0.0.0.0')) self.assertEquals('\xff\x00\xff\x00', g('255.0.255.0')) self.assertEquals('\xaa\xaa\xaa\xaa', g('170.170.170.170')) + self.assertEquals('\xff\xff\xff\xff', g('255.255.255.255')) def testIPv6toString(self): if not hasattr(socket, 'inet_pton'): Index: test_str.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_str.py,v retrieving revision 1.2.4.2 retrieving revision 1.2.4.3 diff -u -d -r1.2.4.2 -r1.2.4.3 --- test_str.py 7 Jan 2005 06:59:12 -0000 1.2.4.2 +++ test_str.py 16 Oct 2005 05:24:01 -0000 1.2.4.3 @@ -19,6 +19,69 @@ string_tests.MixinStrUnicodeUserStringTest.test_formatting(self) self.assertRaises(OverflowError, '%c'.__mod__, 0x1234) + def test_conversion(self): + # Make sure __str__() behaves properly + class Foo0: + def __unicode__(self): + return u"foo" + + class Foo1: + def __str__(self): + return "foo" + + class Foo2(object): + def __str__(self): + return "foo" + + class Foo3(object): + def __str__(self): + return u"foo" + + class Foo4(unicode): + def __str__(self): + return u"foo" + + class Foo5(str): + def __str__(self): + return u"foo" + + class Foo6(str): + def __str__(self): + return "foos" + + def __unicode__(self): + return u"foou" + + class Foo7(unicode): + def __str__(self): + return "foos" + def __unicode__(self): + return u"foou" + + class Foo8(str): + def __new__(cls, content=""): + return str.__new__(cls, 2*content) + def __str__(self): + return self + + class Foo9(str): + def __str__(self): + return "string" + def __unicode__(self): + return "not unicode" + + self.assert_(str(Foo0()).startswith("<")) # this is different from __unicode__ + self.assertEqual(str(Foo1()), "foo") + self.assertEqual(str(Foo2()), "foo") + self.assertEqual(str(Foo3()), "foo") + self.assertEqual(str(Foo4("bar")), "foo") + self.assertEqual(str(Foo5("bar")), "foo") + self.assertEqual(str(Foo6("bar")), "foos") + self.assertEqual(str(Foo7("bar")), "foos") + self.assertEqual(str(Foo8("foo")), "foofoo") + self.assertEqual(str(Foo9("foo")), "string") + self.assertEqual(unicode(Foo9("foo")), u"not unicode") + def test_main(): test_support.run_unittest(StrTest) Index: test_strptime.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_strptime.py,v retrieving revision 1.11.4.2 retrieving revision 1.11.4.3 diff -u -d -r1.11.4.2 -r1.11.4.3 --- test_strptime.py 7 Jan 2005 06:59:12 -0000 1.11.4.2 +++ test_strptime.py 16 Oct 2005 05:24:01 -0000 1.11.4.3 @@ -462,10 +462,12 @@ # Make sure cache is recreated when current locale does not match what # cached object was created with. _strptime.strptime("10", "%d") + _strptime.strptime("2005", "%Y") _strptime._TimeRE_cache.locale_time.lang = "Ni" original_time_re = id(_strptime._TimeRE_cache) _strptime.strptime("10", "%d") self.failIfEqual(original_time_re, id(_strptime._TimeRE_cache)) + self.failUnlessEqual(len(_strptime._regex_cache), 1) def test_regex_cleanup(self): # Make sure cached regexes are discarded when cache becomes "full". Index: test_subprocess.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_subprocess.py,v retrieving revision 1.17.2.1 retrieving revision 1.17.2.2 diff -u -d -r1.17.2.1 -r1.17.2.2 --- test_subprocess.py 7 Jan 2005 06:59:12 -0000 1.17.2.1 +++ test_subprocess.py 16 Oct 2005 05:24:01 -0000 1.17.2.2 @@ -248,6 +248,31 @@ env=newenv) self.assertEqual(p.stdout.read(), "orange") + def test_communicate_stdin(self): + p = subprocess.Popen([sys.executable, "-c", + 'import sys; sys.exit(sys.stdin.read() == "pear")'], + stdin=subprocess.PIPE) + p.communicate("pear") + self.assertEqual(p.returncode, 1) + + def test_communicate_stdout(self): + p = subprocess.Popen([sys.executable, "-c", + 'import sys; sys.stdout.write("pineapple")'], + stdout=subprocess.PIPE) + (stdout, stderr) = p.communicate() + self.assertEqual(stdout, "pineapple") + self.assertEqual(stderr, None) + + def test_communicate_stderr(self): + p = subprocess.Popen([sys.executable, "-c", + 'import sys; sys.stderr.write("pineapple")'], + stderr=subprocess.PIPE) + (stdout, stderr) = p.communicate() + self.assertEqual(stdout, None) + # When running with a pydebug build, the # of references is outputted + # to stderr, so just check if stderr at least started with "pinapple" + self.assert_(stderr.startswith("pineapple")) + def test_communicate(self): p = subprocess.Popen([sys.executable, "-c", 'import sys,os;' \ @@ -359,9 +384,10 @@ def test_no_leaking(self): # Make sure we leak no resources - max_handles = 1026 # too much for most UNIX systems - if mswindows: - max_handles = 65 # a full test is too slow on Windows + if test_support.is_resource_enabled("subprocess") and not mswindows: + max_handles = 1026 # too much for most UNIX systems + else: + max_handles = 65 for i in range(max_handles): p = subprocess.Popen([sys.executable, "-c", "import sys;sys.stdout.write(sys.stdin.read())"], Index: test_sundry.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_sundry.py,v retrieving revision 1.10.2.2 retrieving revision 1.10.2.3 diff -u -d -r1.10.2.2 -r1.10.2.3 --- test_sundry.py 7 Jan 2005 06:59:12 -0000 1.10.2.2 +++ test_sundry.py 16 Oct 2005 05:24:01 -0000 1.10.2.3 @@ -93,5 +93,4 @@ #import user import webbrowser import whichdb -import xdrlib import xml Index: test_support.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_support.py,v retrieving revision 1.41.2.2 retrieving revision 1.41.2.3 diff -u -d -r1.41.2.2 -r1.41.2.3 --- test_support.py 7 Jan 2005 06:59:12 -0000 1.41.2.2 +++ test_support.py 16 Oct 2005 05:24:01 -0000 1.41.2.3 @@ -144,7 +144,7 @@ TESTFN_UNICODE_UNENCODEABLE = None else: # Japanese characters (I think - from bug 846133) - TESTFN_UNICODE_UNENCODEABLE = u"@test-\u5171\u6709\u3055\u308c\u308b" + TESTFN_UNICODE_UNENCODEABLE = eval('u"@test-\u5171\u6709\u3055\u308c\u308b"') try: # XXX - Note - should be using TESTFN_ENCODING here - but for # Windows, "mbcs" currently always operates as if in Index: test_sys.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_sys.py,v retrieving revision 1.6.6.2 retrieving revision 1.6.6.3 diff -u -d -r1.6.6.2 -r1.6.6.3 --- test_sys.py 7 Jan 2005 06:59:12 -0000 1.6.6.2 +++ test_sys.py 16 Oct 2005 05:24:01 -0000 1.6.6.3 @@ -161,6 +161,18 @@ else: self.fail("no exception") + # test that the exit machinery handles SystemExits properly + import subprocess + # both unnormalized... + rc = subprocess.call([sys.executable, "-c", + "raise SystemExit, 46"]) + self.assertEqual(rc, 46) + # ... and normalized + rc = subprocess.call([sys.executable, "-c", + "raise SystemExit(47)"]) + self.assertEqual(rc, 47) + + def test_getdefaultencoding(self): if test.test_support.have_unicode: self.assertRaises(TypeError, sys.getdefaultencoding, 42) @@ -235,7 +247,8 @@ self.assert_(isinstance(sys.executable, basestring)) self.assert_(isinstance(sys.hexversion, int)) self.assert_(isinstance(sys.maxint, int)) - self.assert_(isinstance(sys.maxunicode, int)) + if test.test_support.have_unicode: + self.assert_(isinstance(sys.maxunicode, int)) self.assert_(isinstance(sys.platform, basestring)) self.assert_(isinstance(sys.prefix, basestring)) self.assert_(isinstance(sys.version, basestring)) Index: test_tarfile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_tarfile.py,v retrieving revision 1.7.4.2 retrieving revision 1.7.4.3 diff -u -d -r1.7.4.2 -r1.7.4.3 --- test_tarfile.py 7 Jan 2005 06:59:12 -0000 1.7.4.2 +++ test_tarfile.py 16 Oct 2005 05:24:01 -0000 1.7.4.3 @@ -91,6 +91,16 @@ self.assert_(lines1 == lines2, "_FileObject.readline() does not work correctly") + def test_iter(self): + # Test iteration over ExFileObject. + if self.sep != "|": + filename = "0-REGTYPE-TEXT" + self.tar.extract(filename, dirname()) + lines1 = file(os.path.join(dirname(), filename), "rU").readlines() + lines2 = [line for line in self.tar.extractfile(filename)] + self.assert_(lines1 == lines2, + "ExFileObject iteration does not work correctly") + def test_seek(self): """Test seek() method of _FileObject, incl. random reading. """ @@ -171,6 +181,18 @@ stream.close() +class ReadAsteriskTest(ReadTest): + + def setUp(self): + mode = self.mode + self.sep + "*" + self.tar = tarfile.open(tarname(self.comp), mode) + +class ReadStreamAsteriskTest(ReadStreamTest): + + def setUp(self): + mode = self.mode + self.sep + "*" + self.tar = tarfile.open(tarname(self.comp), mode) + class WriteTest(BaseTest): mode = 'w' @@ -208,6 +230,40 @@ else: self.dst.addfile(tarinfo, f) +class WriteSize0Test(BaseTest): + mode = 'w' + + def setUp(self): + self.tmpdir = dirname() + self.dstname = tmpname() + self.dst = tarfile.open(self.dstname, "w") + + def tearDown(self): + self.dst.close() + + def test_file(self): + path = os.path.join(self.tmpdir, "file") + file(path, "w") + tarinfo = self.dst.gettarinfo(path) + self.assertEqual(tarinfo.size, 0) + file(path, "w").write("aaa") + tarinfo = self.dst.gettarinfo(path) + self.assertEqual(tarinfo.size, 3) + + def test_directory(self): + path = os.path.join(self.tmpdir, "directory") + os.mkdir(path) + tarinfo = self.dst.gettarinfo(path) + self.assertEqual(tarinfo.size, 0) + + def test_symlink(self): + if hasattr(os, "symlink"): + path = os.path.join(self.tmpdir, "symlink") + os.symlink("link_target", path) + tarinfo = self.dst.gettarinfo(path) + self.assertEqual(tarinfo.size, 0) + + class WriteStreamTest(WriteTest): sep = '|' @@ -326,6 +382,11 @@ comp = "gz" class WriteStreamTestGzip(WriteStreamTest): comp = "gz" +class ReadAsteriskTestGzip(ReadAsteriskTest): + comp = "gz" +class ReadStreamAsteriskTestGzip(ReadStreamAsteriskTest): + comp = "gz" + # Filemode test cases @@ -345,6 +406,10 @@ comp = "bz2" class WriteStreamTestBzip2(WriteStreamTestGzip): comp = "bz2" + class ReadAsteriskTestBzip2(ReadAsteriskTest): + comp = "bz2" + class ReadStreamAsteriskTestBzip2(ReadStreamAsteriskTest): + comp = "bz2" # If importing gzip failed, discard the Gzip TestCases. if not gzip: @@ -365,7 +430,10 @@ FileModeTest, ReadTest, ReadStreamTest, + ReadAsteriskTest, + ReadStreamAsteriskTest, WriteTest, + WriteSize0Test, WriteStreamTest, WriteGNULongTest, ] @@ -376,13 +444,15 @@ if gzip: tests.extend([ ReadTestGzip, ReadStreamTestGzip, - WriteTestGzip, WriteStreamTestGzip + WriteTestGzip, WriteStreamTestGzip, + ReadAsteriskTestGzip, ReadStreamAsteriskTestGzip ]) if bz2: tests.extend([ ReadTestBzip2, ReadStreamTestBzip2, - WriteTestBzip2, WriteStreamTestBzip2 + WriteTestBzip2, WriteStreamTestBzip2, + ReadAsteriskTestBzip2, ReadStreamAsteriskTestBzip2 ]) try: test_support.run_unittest(*tests) Index: test_tempfile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_tempfile.py,v retrieving revision 1.1.10.2 retrieving revision 1.1.10.3 diff -u -d -r1.1.10.2 -r1.1.10.3 --- test_tempfile.py 7 Jan 2005 06:59:12 -0000 1.1.10.2 +++ test_tempfile.py 16 Oct 2005 05:24:01 -0000 1.1.10.3 @@ -307,7 +307,7 @@ retval = os.spawnl(os.P_WAIT, sys.executable, decorated, tester, v, fd) self.failIf(retval < 0, "child process caught fatal signal %d" % -retval) - self.failIf(retval > 0, "child process reports failure") + self.failIf(retval > 0, "child process reports failure %d"%retval) def test_textmode(self): # _mkstemp_inner can create files in text mode Index: test_textwrap.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_textwrap.py,v retrieving revision 1.18.10.2 retrieving revision 1.18.10.3 diff -u -d -r1.18.10.2 -r1.18.10.3 --- test_textwrap.py 7 Jan 2005 06:59:12 -0000 1.18.10.2 +++ test_textwrap.py 16 Oct 2005 05:24:01 -0000 1.18.10.3 @@ -165,6 +165,24 @@ ["this-is-a-useful-feature-for-reformatting-", "posts-from-tim-peters'ly"]) + def test_hyphenated_numbers(self): + # Test that hyphenated numbers (eg. dates) are not broken like words. + text = ("Python 1.0.0 was released on 1994-01-26. Python 1.0.1 was\n" + "released on 1994-02-15.") + + self.check_wrap(text, 30, ['Python 1.0.0 was released on', + '1994-01-26. Python 1.0.1 was', + 'released on 1994-02-15.']) + self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.', + 'Python 1.0.1 was released on 1994-02-15.']) + + text = "I do all my shopping at 7-11." + self.check_wrap(text, 25, ["I do all my shopping at", + "7-11."]) + self.check_wrap(text, 27, ["I do all my shopping at", + "7-11."]) + self.check_wrap(text, 29, ["I do all my shopping at 7-11."]) + def test_em_dash(self): # Test text with em-dashes text = "Em-dashes should be written -- thus." @@ -310,17 +328,18 @@ self.check_wrap(text, 30, [" This is a sentence with", "leading whitespace."]) - def test_unicode(self): - # *Very* simple test of wrapping Unicode strings. I'm sure - # there's more to it than this, but let's at least make - # sure textwrap doesn't crash on Unicode input! - text = u"Hello there, how are you today?" - self.check_wrap(text, 50, [u"Hello there, how are you today?"]) - self.check_wrap(text, 20, [u"Hello there, how are", "you today?"]) - olines = self.wrapper.wrap(text) - assert isinstance(olines, list) and isinstance(olines[0], unicode) - otext = self.wrapper.fill(text) - assert isinstance(otext, unicode) + if test_support.have_unicode: + def test_unicode(self): + # *Very* simple test of wrapping Unicode strings. I'm sure + # there's more to it than this, but let's at least make + # sure textwrap doesn't crash on Unicode input! + text = u"Hello there, how are you today?" + self.check_wrap(text, 50, [u"Hello there, how are you today?"]) + self.check_wrap(text, 20, [u"Hello there, how are", "you today?"]) + olines = self.wrapper.wrap(text) + assert isinstance(olines, list) and isinstance(olines[0], unicode) + otext = self.wrapper.fill(text) + assert isinstance(otext, unicode) def test_split(self): # Ensure that the standard _split() method works as advertised Index: test_threading.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_threading.py,v retrieving revision 1.2.18.1 retrieving revision 1.2.18.2 diff -u -d -r1.2.18.1 -r1.2.18.2 --- test_threading.py 28 Apr 2003 17:27:47 -0000 1.2.18.1 +++ test_threading.py 16 Oct 2005 05:24:01 -0000 1.2.18.2 @@ -1,55 +1,112 @@ # Very rudimentary test of threading module -# Create a bunch of threads, let each do some work, wait until all are done - +import test.test_support from test.test_support import verbose import random import threading +import thread import time +import unittest -# This takes about n/3 seconds to run (about n/3 clumps of tasks, times -# about 1 second per clump). -numtasks = 10 - -# no more than 3 of the 10 can run at once -sema = threading.BoundedSemaphore(value=3) -mutex = threading.RLock() -running = 0 +# A trivial mutable counter. +class Counter(object): + def __init__(self): + self.value = 0 + def inc(self): + self.value += 1 + def dec(self): + self.value -= 1 + def get(self): + return self.value class TestThread(threading.Thread): + def __init__(self, name, testcase, sema, mutex, nrunning): + threading.Thread.__init__(self, name=name) + self.testcase = testcase + self.sema = sema + self.mutex = mutex + self.nrunning = nrunning + def run(self): - global running delay = random.random() * 2 if verbose: print 'task', self.getName(), 'will run for', delay, 'sec' - sema.acquire() - mutex.acquire() - running = running + 1 + + self.sema.acquire() + + self.mutex.acquire() + self.nrunning.inc() if verbose: - print running, 'tasks are running' - mutex.release() + print self.nrunning.get(), 'tasks are running' + self.testcase.assert_(self.nrunning.get() <= 3) + self.mutex.release() + time.sleep(delay) if verbose: print 'task', self.getName(), 'done' - mutex.acquire() - running = running - 1 + + self.mutex.acquire() + self.nrunning.dec() + self.testcase.assert_(self.nrunning.get() >= 0) if verbose: - print self.getName(), 'is finished.', running, 'tasks are running' - mutex.release() - sema.release() + print self.getName(), 'is finished.', self.nrunning.get(), \ + 'tasks are running' + self.mutex.release() -threads = [] -def starttasks(): - for i in range(numtasks): - t = TestThread(name=""%i) - threads.append(t) - t.start() + self.sema.release() -starttasks() +class ThreadTests(unittest.TestCase): -if verbose: - print 'waiting for all tasks to complete' -for t in threads: - t.join() -if verbose: - print 'all tasks done' + # Create a bunch of threads, let each do some work, wait until all are + # done. + def test_various_ops(self): + # This takes about n/3 seconds to run (about n/3 clumps of tasks, + # times about 1 second per clump). + NUMTASKS = 10 + + # no more than 3 of the 10 can run at once + sema = threading.BoundedSemaphore(value=3) + mutex = threading.RLock() + numrunning = Counter() + + threads = [] + + for i in range(NUMTASKS): + t = TestThread(""%i, self, sema, mutex, numrunning) + threads.append(t) + t.start() + + if verbose: + print 'waiting for all tasks to complete' + for t in threads: + t.join(NUMTASKS) + self.assert_(not t.isAlive()) + if verbose: + print 'all tasks done' + self.assertEqual(numrunning.get(), 0) + + def test_foreign_thread(self): + # Check that a "foreign" thread can use the threading module. + def f(mutex): + # Acquiring an RLock forces an entry for the foreign + # thread to get made in the threading._active map. + r = threading.RLock() + r.acquire() + r.release() + mutex.release() + + mutex = threading.Lock() + mutex.acquire() + tid = thread.start_new_thread(f, (mutex,)) + # Wait for the thread to finish. + mutex.acquire() + self.assert_(tid in threading._active) + self.assert_(isinstance(threading._active[tid], + threading._DummyThread)) + del threading._active[tid] + +def test_main(): + test.test_support.run_unittest(ThreadTests) + +if __name__ == "__main__": + test_main() Index: test_tokenize.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_tokenize.py,v retrieving revision 1.6.10.2 retrieving revision 1.6.10.3 diff -u -d -r1.6.10.2 -r1.6.10.3 --- test_tokenize.py 7 Jan 2005 06:59:13 -0000 1.6.10.2 +++ test_tokenize.py 16 Oct 2005 05:24:01 -0000 1.6.10.3 @@ -1,12 +1,100 @@ -from test.test_support import verbose, findfile -import tokenize, os, sys +from test.test_support import verbose, findfile, is_resource_enabled, TestFailed +import os, glob, random +from tokenize import (tokenize, generate_tokens, untokenize, + NUMBER, NAME, OP, STRING) if verbose: print 'starting...' f = file(findfile('tokenize_tests' + os.extsep + 'txt')) -tokenize.tokenize(f.readline) +tokenize(f.readline) f.close() + + +###### Test roundtrip for untokenize ########################## + +def test_roundtrip(f): + ## print 'Testing:', f + f = file(f) + try: + fulltok = list(generate_tokens(f.readline)) + finally: + f.close() + + t1 = [tok[:2] for tok in fulltok] + newtext = untokenize(t1) + readline = iter(newtext.splitlines(1)).next + t2 = [tok[:2] for tok in generate_tokens(readline)] + assert t1 == t2 + + +f = findfile('tokenize_tests' + os.extsep + 'txt') +test_roundtrip(f) + +testdir = os.path.dirname(f) or os.curdir +testfiles = glob.glob(testdir + os.sep + 'test*.py') +if not is_resource_enabled('compiler'): + testfiles = random.sample(testfiles, 10) + +for f in testfiles: + test_roundtrip(f) + + +###### Test detecton of IndentationError ###################### + +from cStringIO import StringIO + +sampleBadText = """ +def foo(): + bar + baz +""" + +try: + for tok in generate_tokens(StringIO(sampleBadText).readline): + pass +except IndentationError: + pass +else: + raise TestFailed("Did not detect IndentationError:") + + +###### Test example in the docs ############################### + +from decimal import Decimal +from cStringIO import StringIO + +def decistmt(s): + """Substitute Decimals for floats in a string of statements. + + >>> from decimal import Decimal + >>> s = 'print +21.3e-5*-.1234/81.7' + >>> decistmt(s) + "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')" + + >>> exec(s) + -3.21716034272e-007 + >>> exec(decistmt(s)) + -3.217160342717258261933904529E-7 + + """ + result = [] + g = generate_tokens(StringIO(s).readline) # tokenize the string + for toknum, tokval, _, _, _ in g: + if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens + result.extend([ + (NAME, 'Decimal'), + (OP, '('), + (STRING, repr(tokval)), + (OP, ')') + ]) + else: + result.append((toknum, tokval)) + return untokenize(result) + +import doctest +doctest.testmod() + if verbose: print 'finished' Index: test_trace.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_trace.py,v retrieving revision 1.7.10.3 retrieving revision 1.7.10.4 diff -u -d -r1.7.10.3 -r1.7.10.4 --- test_trace.py 13 Oct 2005 16:58:39 -0000 1.7.10.3 +++ test_trace.py 16 Oct 2005 05:24:01 -0000 1.7.10.4 @@ -97,6 +97,7 @@ (-3, 'call'), (-2, 'line'), (-2, 'exception'), + (-2, 'return'), (2, 'exception'), (3, 'line'), (4, 'line'), Index: test_unicode.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_unicode.py,v retrieving revision 1.58.2.2 retrieving revision 1.58.2.3 diff -u -d -r1.58.2.2 -r1.58.2.3 --- test_unicode.py 7 Jan 2005 06:59:13 -0000 1.58.2.2 +++ test_unicode.py 16 Oct 2005 05:24:01 -0000 1.58.2.3 @@ -388,7 +388,10 @@ self.assertEqual('%i %*.*s' % (10, 5,3,u'abc',), u'10 abc') self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, u'abc',), u'103 abc') self.assertEqual('%c' % u'a', u'a') - + class Wrapper: + def __str__(self): + return u'\u1234' + self.assertEqual('%s' % Wrapper(), u'\u1234') def test_constructor(self): # unicode(obj) tests (this maps to PyObject_Unicode() at C level) @@ -725,6 +728,69 @@ y = x.encode("raw-unicode-escape").decode("raw-unicode-escape") self.assertEqual(x, y) + def test_conversion(self): + # Make sure __unicode__() works properly + class Foo0: + def __str__(self): + return "foo" + + class Foo1: + def __unicode__(self): + return u"foo" + + class Foo2(object): + def __unicode__(self): + return u"foo" + + class Foo3(object): + def __unicode__(self): + return "foo" + + class Foo4(str): + def __unicode__(self): + return "foo" + + class Foo5(unicode): + def __unicode__(self): + return "foo" + + class Foo6(str): + def __str__(self): + return "foos" + + def __unicode__(self): + return u"foou" + + class Foo7(unicode): + def __str__(self): + return "foos" + def __unicode__(self): + return u"foou" + + class Foo8(unicode): + def __new__(cls, content=""): + return unicode.__new__(cls, 2*content) + def __unicode__(self): + return self + + class Foo9(unicode): + def __str__(self): + return "string" + def __unicode__(self): + return "not unicode" + + self.assertEqual(unicode(Foo0()), u"foo") + self.assertEqual(unicode(Foo1()), u"foo") + self.assertEqual(unicode(Foo2()), u"foo") + self.assertEqual(unicode(Foo3()), u"foo") + self.assertEqual(unicode(Foo4("bar")), u"foo") + self.assertEqual(unicode(Foo5("bar")), u"foo") + self.assertEqual(unicode(Foo6("bar")), u"foou") + self.assertEqual(unicode(Foo7("bar")), u"foou") + self.assertEqual(unicode(Foo8("foo")), u"foofoo") + self.assertEqual(str(Foo9("foo")), "string") + self.assertEqual(unicode(Foo9("foo")), u"not unicode") + def test_main(): test_support.run_unittest(UnicodeTest) Index: test_unicode_file.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_unicode_file.py,v retrieving revision 1.4.2.2 retrieving revision 1.4.2.3 diff -u -d -r1.4.2.2 -r1.4.2.3 --- test_unicode_file.py 7 Jan 2005 06:59:13 -0000 1.4.2.2 +++ test_unicode_file.py 16 Oct 2005 05:24:01 -0000 1.4.2.3 @@ -44,8 +44,10 @@ def _do_single(self, filename): self.failUnless(os.path.exists(filename)) self.failUnless(os.path.isfile(filename)) + self.failUnless(os.access(filename, os.R_OK)) self.failUnless(os.path.exists(os.path.abspath(filename))) self.failUnless(os.path.isfile(os.path.abspath(filename))) + self.failUnless(os.access(os.path.abspath(filename), os.R_OK)) os.chmod(filename, 0777) os.utime(filename, None) os.utime(filename, (time.time(), time.time())) Index: test_unicodedata.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_unicodedata.py,v retrieving revision 1.5.26.2 retrieving revision 1.5.26.3 diff -u -d -r1.5.26.2 -r1.5.26.3 --- test_unicodedata.py 7 Jan 2005 06:59:13 -0000 1.5.26.2 +++ test_unicodedata.py 16 Oct 2005 05:24:01 -0000 1.5.26.3 @@ -191,7 +191,7 @@ def test_decimal_numeric_consistent(self): # Test that decimal and numeric are consistent, # i.e. if a character has a decimal value, - # it's numeric value should be the same. + # its numeric value should be the same. count = 0 for i in xrange(0x10000): c = unichr(i) @@ -204,7 +204,7 @@ def test_digit_numeric_consistent(self): # Test that digit and numeric are consistent, # i.e. if a character has a digit value, - # it's numeric value should be the same. + # its numeric value should be the same. count = 0 for i in xrange(0x10000): c = unichr(i) Index: test_urllib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_urllib.py,v retrieving revision 1.8.26.2 retrieving revision 1.8.26.3 diff -u -d -r1.8.26.2 -r1.8.26.3 --- test_urllib.py 7 Jan 2005 06:59:13 -0000 1.8.26.2 +++ test_urllib.py 16 Oct 2005 05:24:01 -0000 1.8.26.3 @@ -6,6 +6,7 @@ from test import test_support import os import mimetools +import tempfile import StringIO def hexescape(char): @@ -125,15 +126,53 @@ """Test urllib.urlretrieve() on local files""" def setUp(self): + # Create a list of temporary files. Each item in the list is a file + # name (absolute path or relative to the current working directory). + # All files in this list will be deleted in the tearDown method. Note, + # this only helps to makes sure temporary files get deleted, but it + # does nothing about trying to close files that may still be open. It + # is the responsibility of the developer to properly close files even + # when exceptional conditions occur. + self.tempFiles = [] + # Create a temporary file. + self.registerFileForCleanUp(test_support.TESTFN) self.text = 'testing urllib.urlretrieve' - FILE = file(test_support.TESTFN, 'wb') - FILE.write(self.text) - FILE.close() + try: + FILE = file(test_support.TESTFN, 'wb') + FILE.write(self.text) + FILE.close() + finally: + try: FILE.close() + except: pass def tearDown(self): - # Delete the temporary file. - os.remove(test_support.TESTFN) + # Delete the temporary files. + for each in self.tempFiles: + try: os.remove(each) + except: pass + + def constructLocalFileUrl(self, filePath): + return "file://%s" % urllib.pathname2url(os.path.abspath(filePath)) + + def createNewTempFile(self, data=""): + """Creates a new temporary file containing the specified data, + registers the file for deletion during the test fixture tear down, and + returns the absolute path of the file.""" + + newFd, newFilePath = tempfile.mkstemp() + try: + self.registerFileForCleanUp(newFilePath) + newFile = os.fdopen(newFd, "wb") + newFile.write(data) + newFile.close() + finally: + try: newFile.close() + except: pass + return newFilePath + + def registerFileForCleanUp(self, fileName): + self.tempFiles.append(fileName) def test_basic(self): # Make sure that a local file just gets its own location returned and @@ -147,15 +186,19 @@ def test_copy(self): # Test that setting the filename argument works. second_temp = "%s.2" % test_support.TESTFN - result = urllib.urlretrieve("file:%s" % test_support.TESTFN, second_temp) + self.registerFileForCleanUp(second_temp) + result = urllib.urlretrieve(self.constructLocalFileUrl( + test_support.TESTFN), second_temp) self.assertEqual(second_temp, result[0]) self.assert_(os.path.exists(second_temp), "copy of the file was not " "made") FILE = file(second_temp, 'rb') try: text = FILE.read() - finally: FILE.close() + finally: + try: FILE.close() + except: pass self.assertEqual(self.text, text) def test_reporthook(self): @@ -167,8 +210,49 @@ self.assertEqual(count, count_holder[0]) count_holder[0] = count_holder[0] + 1 second_temp = "%s.2" % test_support.TESTFN - urllib.urlretrieve(test_support.TESTFN, second_temp, hooktester) - os.remove(second_temp) + self.registerFileForCleanUp(second_temp) + urllib.urlretrieve(self.constructLocalFileUrl(test_support.TESTFN), + second_temp, hooktester) + + def test_reporthook_0_bytes(self): + # Test on zero length file. Should call reporthook only 1 time. + report = [] + def hooktester(count, block_size, total_size, _report=report): + _report.append((count, block_size, total_size)) + srcFileName = self.createNewTempFile() + urllib.urlretrieve(self.constructLocalFileUrl(srcFileName), + test_support.TESTFN, hooktester) + self.assertEqual(len(report), 1) + self.assertEqual(report[0][2], 0) + + def test_reporthook_5_bytes(self): + # Test on 5 byte file. Should call reporthook only 2 times (once when + # the "network connection" is established and once when the block is + # read). Since the block size is 8192 bytes, only one block read is + # required to read the entire file. + report = [] + def hooktester(count, block_size, total_size, _report=report): + _report.append((count, block_size, total_size)) + srcFileName = self.createNewTempFile("x" * 5) + urllib.urlretrieve(self.constructLocalFileUrl(srcFileName), + test_support.TESTFN, hooktester) + self.assertEqual(len(report), 2) + self.assertEqual(report[0][1], 8192) + self.assertEqual(report[0][2], 5) + + def test_reporthook_8193_bytes(self): + # Test on 8193 byte file. Should call reporthook only 3 times (once + # when the "network connection" is established, once for the next 8192 + # bytes, and once for the last byte). + report = [] + def hooktester(count, block_size, total_size, _report=report): + _report.append((count, block_size, total_size)) + srcFileName = self.createNewTempFile("x" * 8193) + urllib.urlretrieve(self.constructLocalFileUrl(srcFileName), + test_support.TESTFN, hooktester) + self.assertEqual(len(report), 3) + self.assertEqual(report[0][1], 8192) + self.assertEqual(report[0][2], 8193) class QuotingTests(unittest.TestCase): """Tests for urllib.quote() and urllib.quote_plus() @@ -269,6 +353,12 @@ self.assertEqual(expect, result, "using quote_plus(): %s != %s" % (expect, result)) + def test_quoting_plus(self): + self.assertEqual(urllib.quote_plus('alpha+beta gamma'), + 'alpha%2Bbeta+gamma') + self.assertEqual(urllib.quote_plus('alpha+beta gamma', '+'), + 'alpha+beta+gamma') + class UnquotingTests(unittest.TestCase): """Tests for unquote() and unquote_plus() Index: test_urllib2.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_urllib2.py,v retrieving revision 1.6.10.2 retrieving revision 1.6.10.3 diff -u -d -r1.6.10.2 -r1.6.10.3 --- test_urllib2.py 7 Jan 2005 06:59:13 -0000 1.6.10.2 +++ test_urllib2.py 16 Oct 2005 05:24:01 -0000 1.6.10.3 @@ -41,6 +41,18 @@ buf = f.read() f.close() + def test_statudict(self): + # test the new-in-2.5 httpresponses dictionary + self.assertEquals(urllib2.httpresponses[404], "Not Found") + + def test_parse_http_list(self): + tests = [('a,b,c', ['a', 'b', 'c']), + ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']), + ('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']), + ('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])] + for string, list in tests: + self.assertEquals(urllib2.parse_http_list(string), list) + class MockOpener: addheaders = [] Index: test_urlparse.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_urlparse.py,v retrieving revision 1.5.2.2 retrieving revision 1.5.2.3 diff -u -d -r1.5.2.2 -r1.5.2.3 --- test_urlparse.py 7 Jan 2005 06:59:13 -0000 1.5.2.2 +++ test_urlparse.py 16 Oct 2005 05:24:01 -0000 1.5.2.3 @@ -8,20 +8,22 @@ RFC2396_BASE = "http://a/b/c/d;p?q" class UrlParseTestCase(unittest.TestCase): - def test_frags(self): - for url, parsed, split in [ - ('http://www.python.org', - ('http', 'www.python.org', '', '', '', ''), - ('http', 'www.python.org', '', '', '')), - ('http://www.python.org#abc', - ('http', 'www.python.org', '', '', '', 'abc'), - ('http', 'www.python.org', '', '', 'abc')), - ('http://www.python.org/#abc', - ('http', 'www.python.org', '/', '', '', 'abc'), - ('http', 'www.python.org', '/', '', 'abc')), - (RFC1808_BASE, - ('http', 'a', '/b/c/d', 'p', 'q', 'f'), - ('http', 'a', '/b/c/d;p', 'q', 'f')), + + def checkRoundtrips(self, url, parsed, split): + result = urlparse.urlparse(url) + self.assertEqual(result, parsed) + # put it back together and it should be the same + result2 = urlparse.urlunparse(result) + self.assertEqual(result2, url) + + # check the roundtrip using urlsplit() as well + result = urlparse.urlsplit(url) + self.assertEqual(result, split) + result2 = urlparse.urlunsplit(result) + self.assertEqual(result2, url) + + def test_roundtrips(self): + testcases = [ ('file:///tmp/junk.txt', ('file', '', '/tmp/junk.txt', '', '', ''), ('file', '', '/tmp/junk.txt', '', '')), @@ -29,20 +31,46 @@ ('imap', 'mail.python.org', '/mbox1', '', '', ''), ('imap', 'mail.python.org', '/mbox1', '', '')), ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf', - ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', '', '', ''), - ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', '', '')), - ]: - result = urlparse.urlparse(url) - self.assertEqual(result, parsed) - # put it back together and it should be the same - result2 = urlparse.urlunparse(result) - self.assertEqual(result2, url) + ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', + '', '', ''), + ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', + '', '')), + ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/', + ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', + '', '', ''), + ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', + '', '')) + ] + for url, parsed, split in testcases: + self.checkRoundtrips(url, parsed, split) - # check the roundtrip using urlsplit() as well - result = urlparse.urlsplit(url) - self.assertEqual(result, split) - result2 = urlparse.urlunsplit(result) - self.assertEqual(result2, url) + def test_http_roundtrips(self): + # urlparse.urlsplit treats 'http:' as an optimized special case, + # so we test both 'http:' and 'https:' in all the following. + # Three cheers for white box knowledge! + testcases = [ + ('://www.python.org', + ('www.python.org', '', '', '', ''), + ('www.python.org', '', '', '')), + ('://www.python.org#abc', + ('www.python.org', '', '', '', 'abc'), + ('www.python.org', '', '', 'abc')), + ('://www.python.org?q=abc', + ('www.python.org', '', '', 'q=abc', ''), + ('www.python.org', '', 'q=abc', '')), + ('://www.python.org/#abc', + ('www.python.org', '/', '', '', 'abc'), + ('www.python.org', '/', '', 'abc')), + ('://a/b/c/d;p?q#f', + ('a', '/b/c/d', 'p', 'q', 'f'), + ('a', '/b/c/d;p', 'q', 'f')), + ] + for scheme in ('http', 'https'): + for url, parsed, split in testcases: + url = scheme + url + parsed = (scheme,) + parsed + split = (scheme,) + split + self.checkRoundtrips(url, parsed, split) def checkJoin(self, base, relurl, expected): self.assertEqual(urlparse.urljoin(base, relurl), expected, Index: test_userdict.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_userdict.py,v retrieving revision 1.5.10.2 retrieving revision 1.5.10.3 diff -u -d -r1.5.10.2 -r1.5.10.3 --- test_userdict.py 7 Jan 2005 06:59:13 -0000 1.5.10.2 +++ test_userdict.py 16 Oct 2005 05:24:01 -0000 1.5.10.3 @@ -191,12 +191,12 @@ for key, value in self.iteritems(): d[key] = value return d + @classmethod def fromkeys(cls, keys, value=None): d = cls() for key in keys: d[key] = value return d - fromkeys = classmethod(fromkeys) class UserDictMixinTest(mapping_tests.TestMappingProtocol): type2test = SeqDict Index: test_userstring.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_userstring.py,v retrieving revision 1.6.2.2 retrieving revision 1.6.2.3 diff -u -d -r1.6.2.2 -r1.6.2.3 --- test_userstring.py 7 Jan 2005 06:59:13 -0000 1.6.2.2 +++ test_userstring.py 16 Oct 2005 05:24:01 -0000 1.6.2.3 @@ -5,7 +5,7 @@ import unittest from test import test_support, string_tests -from UserString import UserString +from UserString import UserString, MutableString class UserStringTest( string_tests.CommonTest, @@ -43,8 +43,77 @@ # we don't fix the arguments, because UserString can't cope with it getattr(object, methodname)(*args) +class MutableStringTest(UserStringTest): + type2test = MutableString + + # MutableStrings can be hashed => deactivate test + def test_hash(self): + pass + + def test_setitem(self): + s = self.type2test("foo") + self.assertRaises(IndexError, s.__setitem__, -4, "bar") + self.assertRaises(IndexError, s.__setitem__, 3, "bar") + s[-1] = "bar" + self.assertEqual(s, "fobar") + s[0] = "bar" + self.assertEqual(s, "barobar") + + def test_delitem(self): + s = self.type2test("foo") + self.assertRaises(IndexError, s.__delitem__, -4) + self.assertRaises(IndexError, s.__delitem__, 3) + del s[-1] + self.assertEqual(s, "fo") + del s[0] + self.assertEqual(s, "o") + del s[0] + self.assertEqual(s, "") + + def test_setslice(self): + s = self.type2test("foo") + s[:] = "bar" + self.assertEqual(s, "bar") + s[1:2] = "foo" + self.assertEqual(s, "bfoor") + s[1:-1] = UserString("a") + self.assertEqual(s, "bar") + s[0:10] = 42 + self.assertEqual(s, "42") + + def test_delslice(self): + s = self.type2test("foobar") + del s[3:10] + self.assertEqual(s, "foo") + del s[-1:10] + self.assertEqual(s, "fo") + + def test_immutable(self): + s = self.type2test("foobar") + s2 = s.immutable() + self.assertEqual(s, s2) + self.assert_(isinstance(s2, UserString)) + + def test_iadd(self): + s = self.type2test("foo") + s += "bar" + self.assertEqual(s, "foobar") + s += UserString("baz") + self.assertEqual(s, "foobarbaz") + s += 42 + self.assertEqual(s, "foobarbaz42") + + def test_imul(self): + s = self.type2test("foo") + s *= 1 + self.assertEqual(s, "foo") + s *= 2 + self.assertEqual(s, "foofoo") + s *= -1 + self.assertEqual(s, "") + def test_main(): - test_support.run_unittest(UserStringTest) + test_support.run_unittest(UserStringTest, MutableStringTest) if __name__ == "__main__": test_main() Index: test_weakref.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_weakref.py,v retrieving revision 1.19.2.2 retrieving revision 1.19.2.3 diff -u -d -r1.19.2.2 -r1.19.2.3 --- test_weakref.py 7 Jan 2005 06:59:13 -0000 1.19.2.2 +++ test_weakref.py 16 Oct 2005 05:24:01 -0000 1.19.2.3 @@ -271,6 +271,12 @@ del f[0] self.assertEqual(f.result, 0) + def test_proxy_bool(self): + # Test clearing of SF bug #1170766 + class List(list): pass + lyst = List() + self.assertEqual(bool(weakref.proxy(lyst)), bool(lyst)) + def test_getweakrefcount(self): o = C() ref1 = weakref.ref(o) @@ -995,6 +1001,91 @@ def _reference(self): return self.__ref.copy() +libreftest = """ Doctest for examples in the library reference: libweakref.tex + +>>> import weakref +>>> class Dict(dict): +... pass +... +>>> obj = Dict(red=1, green=2, blue=3) # this object is weak referencable +>>> r = weakref.ref(obj) +>>> print r() +{'blue': 3, 'green': 2, 'red': 1} + +>>> import weakref +>>> class Object: +... pass +... +>>> o = Object() +>>> r = weakref.ref(o) +>>> o2 = r() +>>> o is o2 +True +>>> del o, o2 +>>> print r() +None + +>>> import weakref +>>> class ExtendedRef(weakref.ref): +... def __init__(self, ob, callback=None, **annotations): +... super(ExtendedRef, self).__init__(ob, callback) +... self.__counter = 0 +... for k, v in annotations.iteritems(): +... setattr(self, k, v) +... def __call__(self): +... '''Return a pair containing the referent and the number of +... times the reference has been called. +... ''' +... ob = super(ExtendedRef, self).__call__() +... if ob is not None: +... self.__counter += 1 +... ob = (ob, self.__counter) +... return ob +... +>>> class A: # not in docs from here, just testing the ExtendedRef +... pass +... +>>> a = A() +>>> r = ExtendedRef(a, foo=1, bar="baz") +>>> r.foo +1 +>>> r.bar +'baz' +>>> r()[1] +1 +>>> r()[1] +2 +>>> r()[0] is a +True + + +>>> import weakref +>>> _id2obj_dict = weakref.WeakValueDictionary() +>>> def remember(obj): +... oid = id(obj) +... _id2obj_dict[oid] = obj +... return oid +... +>>> def id2obj(oid): +... return _id2obj_dict[oid] +... +>>> a = A() # from here, just testing +>>> a_id = remember(a) +>>> id2obj(a_id) is a +True +>>> del a +>>> try: +... id2obj(a_id) +... except KeyError: +... print 'OK' +... else: +... print 'WeakValueDictionary error' +OK + +""" + +__test__ = {'libreftest' : libreftest} + def test_main(): test_support.run_unittest( ReferencesTestCase, @@ -1002,6 +1093,7 @@ WeakValueDictionaryTestCase, WeakKeyDictionaryTestCase, ) + test_support.run_doctest(sys.modules[__name__]) if __name__ == "__main__": Index: test_xmlrpc.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_xmlrpc.py,v retrieving revision 1.2.10.2 retrieving revision 1.2.10.3 diff -u -d -r1.2.10.2 -r1.2.10.3 --- test_xmlrpc.py 7 Jan 2005 06:59:13 -0000 1.2.10.2 +++ test_xmlrpc.py 16 Oct 2005 05:24:01 -0000 1.2.10.3 @@ -1,8 +1,16 @@ +import datetime import sys import unittest import xmlrpclib from test import test_support +try: + unicode +except NameError: + have_unicode = False +else: + have_unicode = True + alist = [{'astring': 'foo at bar.baz.spam', 'afloat': 7283.43, 'anint': 2**20, @@ -12,6 +20,11 @@ 'boolean': xmlrpclib.False, 'unicode': u'\u4000\u6000\u8000', u'ukey\u4000': 'regular value', + 'datetime1': xmlrpclib.DateTime('20050210T11:41:23'), + 'datetime2': xmlrpclib.DateTime( + (2005, 02, 10, 11, 41, 23, 0, 1, -1)), + 'datetime3': xmlrpclib.DateTime( + datetime.datetime(2005, 02, 10, 11, 41, 23)), }] class XMLRPCTestCase(unittest.TestCase): @@ -20,6 +33,49 @@ self.assertEquals(alist, xmlrpclib.loads(xmlrpclib.dumps((alist,)))[0][0]) + def test_dump_bare_datetime(self): + # This checks that an unwrapped datetime.date object can be handled + # by the marshalling code. This can't be done via test_dump_load() + # since with use_datetime set to 1 the unmarshaller would create + # datetime objects for the 'datetime[123]' keys as well + dt = datetime.datetime(2005, 02, 10, 11, 41, 23) + s = xmlrpclib.dumps((dt,)) + (newdt,), m = xmlrpclib.loads(s, use_datetime=1) + self.assertEquals(newdt, dt) + self.assertEquals(m, None) + + (newdt,), m = xmlrpclib.loads(s, use_datetime=0) + self.assertEquals(newdt, xmlrpclib.DateTime('20050210T11:41:23')) + + def test_dump_bare_date(self): + # This checks that an unwrapped datetime.date object can be handled + # by the marshalling code. This can't be done via test_dump_load() + # since the unmarshaller produces a datetime object + d = datetime.datetime(2005, 02, 10, 11, 41, 23).date() + s = xmlrpclib.dumps((d,)) + (newd,), m = xmlrpclib.loads(s, use_datetime=1) + self.assertEquals(newd.date(), d) + self.assertEquals(newd.time(), datetime.time(0, 0, 0)) + self.assertEquals(m, None) + + (newdt,), m = xmlrpclib.loads(s, use_datetime=0) + self.assertEquals(newdt, xmlrpclib.DateTime('20050210T00:00:00')) + + def test_dump_bare_time(self): + # This checks that an unwrapped datetime.time object can be handled + # by the marshalling code. This can't be done via test_dump_load() + # since the unmarshaller produces a datetime object + t = datetime.datetime(2005, 02, 10, 11, 41, 23).time() + s = xmlrpclib.dumps((t,)) + (newt,), m = xmlrpclib.loads(s, use_datetime=1) + today = datetime.datetime.now().date().strftime("%Y%m%d") + self.assertEquals(newt.time(), t) + self.assertEquals(newt.date(), datetime.datetime.now().date()) + self.assertEquals(m, None) + + (newdt,), m = xmlrpclib.loads(s, use_datetime=0) + self.assertEquals(newdt, xmlrpclib.DateTime('%sT11:41:23'%today)) + def test_dump_big_long(self): self.assertRaises(OverflowError, xmlrpclib.dumps, (2L**99,)) @@ -39,6 +95,48 @@ xmlrpclib.loads(strg)[0][0]) self.assertRaises(TypeError, xmlrpclib.dumps, (arg1,)) + def test_default_encoding_issues(self): + # SF bug #1115989: wrong decoding in '_stringify' + utf8 = """ + + + abc \x95 + + + + + def \x96 + ghi \x97 + + + + + """ + + # sys.setdefaultencoding() normally doesn't exist after site.py is + # loaded. reload(sys) is the way to get it back. + old_encoding = sys.getdefaultencoding() + setdefaultencoding_existed = hasattr(sys, "setdefaultencoding") + reload(sys) # ugh! + sys.setdefaultencoding("iso-8859-1") + try: + (s, d), m = xmlrpclib.loads(utf8) + finally: + sys.setdefaultencoding(old_encoding) + if not setdefaultencoding_existed: + del sys.setdefaultencoding + + items = d.items() + if have_unicode: + self.assertEquals(s, u"abc \x95") + self.assert_(isinstance(s, unicode)) + self.assertEquals(items, [(u"def \x96", u"ghi \x97")]) + self.assert_(isinstance(items[0][0], unicode)) + self.assert_(isinstance(items[0][1], unicode)) + else: + self.assertEquals(s, "abc \xc2\x95") + self.assertEquals(items, [("def \xc2\x96", "ghi \xc2\x97")]) + def test_main(): test_support.run_unittest(XMLRPCTestCase) Index: test_zlib.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_zlib.py,v retrieving revision 1.17.2.2 retrieving revision 1.17.2.3 diff -u -d -r1.17.2.2 -r1.17.2.3 --- test_zlib.py 7 Jan 2005 06:59:13 -0000 1.17.2.2 +++ test_zlib.py 16 Oct 2005 05:24:01 -0000 1.17.2.3 @@ -299,7 +299,7 @@ self.failUnless(co.flush()) # Returns a zlib header dco = zlib.decompressobj() self.assertEqual(dco.flush(), "") # Returns nothing - + def genblock(seed, length, step=1024, generator=random): """length-byte stream of random data from a seed (in step-byte blocks).""" From ncoghlan at users.sourceforge.net Sun Oct 16 09:30:22 2005 From: ncoghlan at users.sourceforge.net (ncoghlan@users.sourceforge.net) Date: Sun, 16 Oct 2005 09:30:22 +0200 (CEST) Subject: [Python-checkins] python/nondist/peps pep-0343.txt,1.29,1.30 Message-ID: <20051016073022.B53841E4054@bag.python.org> Update of /cvsroot/python/python/nondist/peps In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12102 Modified Files: pep-0343.txt Log Message: Update PEP 343 to reflect post-acceptance python-dev discussions Index: pep-0343.txt =================================================================== RCS file: /cvsroot/python/python/nondist/peps/pep-0343.txt,v retrieving revision 1.29 retrieving revision 1.30 diff -u -d -r1.29 -r1.30 --- pep-0343.txt 12 Jul 2005 16:28:56 -0000 1.29 +++ pep-0343.txt 16 Oct 2005 07:30:17 -0000 1.30 @@ -2,13 +2,29 @@ Title: Anonymous Block Redux and Generator Enhancements Version: $Revision$ Last-Modified: $Date$ -Author: Guido van Rossum +Author: Guido van Rossum, Nick Coghlan Status: Accepted Type: Standards Track Content-Type: text/plain Created: 13-May-2005 Post-History: 2-Jun-2005 +Abstract + + This PEP adds a new statement "with" to the Python language to make + it possible to factor out standard uses of try/finally statements. + + The PEP has been approved in principle by the BDFL, but there are + still a couple of implementation details to be worked out (see the + section on Open Issues). + +Author's Note + + This PEP was originally written in first person by Guido, and + subsequently updated by Nick Coghlan to reflect later discussion + on python-dev. Any first person references are from Guido's + original. + Introduction After a lot of discussion about PEP 340 and alternatives, I @@ -208,7 +224,7 @@ The translation of the above statement is: - abc = EXPR + abc = (EXPR).__with__() exc = (None, None, None) VAR = abc.__enter__() try: @@ -224,6 +240,15 @@ accessible to the user; they will most likely be implemented as special registers or stack positions. + The call to the __with__() method serves a similar purpose to that + of the __iter__() method of iterator and iterables. An object with + with simple state requirements (such as threading.RLock) may provide + its own __enter__() and __exit__() methods, and simply return + 'self' from its __with__ method. On the other hand, an object with + more complex state requirements (such as decimal.Context) may + return a distinct context manager object each time its __with__ + method is invoked. + If the "as VAR" part of the syntax is omitted, the "VAR =" part of the translation is omitted (but abc.__enter__() is still called). @@ -254,15 +279,18 @@ Generator Decorator - If PEP 342 is accepted, it will be possible to write a decorator + With PEP 342 accepted, it is possible to write a decorator that makes it possible to use a generator that yields exactly once to control a with-statement. Here's a sketch of such a decorator: - class ContextWrapper(object): + class GeneratorContext(object): def __init__(self, gen): self.gen = gen + def __with__(self): + return self + def __enter__(self): try: return self.gen.next() @@ -285,25 +313,31 @@ else: raise RuntimeError("generator caught exception") - def contextmanager(func): + def context(func): def helper(*args, **kwds): - return ContextWrapper(func(*args, **kwds)) + return GeneratorContext(func(*args, **kwds)) return helper This decorator could be used as follows: - @contextmanager + @context def opening(filename): - f = open(filename) # IOError is untouched by ContextWrapper + f = open(filename) # IOError is untouched by GeneratorContext try: yield f finally: f.close() # Ditto for errors here (however unlikely) A robust implementation of this decorator should be made part of - the standard library, but not necessarily as a built-in function. - (I'm not sure which exception it should raise for errors; - RuntimeError is used above as an example only.) + the standard library. Refer to Open Issues regarding its name and + location. + + Just as generator-iterator functions are very useful for writing + __iter__() methods for iterables, generator-context functions will + be very useful for writing __with__() methods for contexts. It is + proposed that the invocation of the "context" decorator be + considered implicit for generator functions used as __with__() + methods (again, refer to the Open Issues section). Optional Extensions @@ -332,17 +366,78 @@ is entered). OTOH such mistakes are easily diagnosed; for example, the - contextmanager decorator above raises RuntimeError when the second - with-statement calls f.__enter__() again. + generator-context decorator above raises RuntimeError when a + second with-statement calls f.__enter__() again. A similar error + can be raised if __enter__ is invoked on a closed file object. -Resolved Open Issues +Standard Terminology - Discussion on python-dev revealed some open issues. I list them - here, with my preferred resolution and its motivation. The PEP - has been accepted without these being challenged, so the issues - are now resolved. + Discussions about iterators and iterables are aided by the standard + terminology used to discuss them. The protocol used by the for + statement is called the iterator protocol and an iterator is any + object that properly implements that protocol. The term "iterable" + then encompasses all objects with an __iter__() method that + returns an iterator (this means that all iterators are iterables, + but not all iterables are iterators). - 1. The __exit__() method of the contextmanager decorator class + This PEP proposes that the protocol used by the with statement be + known as the "context management protocol", and that objects that + implement that protocol be known as "context managers". The term + "context" then encompasses all objects with a __with__() method + that returns a context manager (this means that all context managers + are contexts, but not all contexts are context managers). + + The term "context" is based on the concept that the context object + defines a context of execution for the code that forms the body + of the with statement. + + In cases where the general term "context" would be ambiguous, it + can be made explicit by expanding it to "manageable context". + +Open Issues + + Discussion on python-dev revealed some open issues. These are listed + here and will be resolved either by consensus on python-dev or by + BDFL fiat. + + 1. The name of the decorator used to convert a generator-iterator + function into a generator-context function is still to be + finalised. + The proposal in this PEP is that it be called simply "context" + with the following reasoning: + - A "generator function" is an undecorated function containing + the 'yield' keyword, and the objects produced by + such functions are "generator-iterators". The term + "generator" may refer to either a generator function or a + generator-iterator depending on the situation. + - A "generator context function" is a generator function to + which the "context" decorator is applied and the objects + produced by such functions are "generator-context-managers". + The term "generator context" may refer to either a generator + context function or a generator-context-manager depending on + the situation. + + 2. Should the decorator to convert a generator function into a + generator context function be a builtin, or located elsewhere in + the standard library? This PEP suggests that it should be a + builtin, as generator context functions are the recommended way + of writing new context managers. + + 3. Should a generator function used to implement a __with__ method + always be considered to be a generator context function, without + requiring the context decorator? This PEP suggests that it + should, as applying a decorator to a slot just looks strange, + and omitting the decorator would be a source of obscure bugs. + The __new__ slot provides some precedent for special casing of + certain slots when processing slot methods. + +Resolved Issues + + The following issues were resolved either by BDFL fiat, consensus on + python-dev, or a simple lack of objection to proposals in the + original version of this PEP. + + 1. The __exit__() method of the GeneratorContext class catches StopIteration and considers it equivalent to re-raising the exception passed to throw(). Is allowing StopIteration right here? @@ -362,19 +457,77 @@ finally-clause (the one implicit in the with-statement) which re-raises the original exception anyway. + 2. What exception should GeneratorContext raise when the underlying + generator-iterator misbehaves? The following quote is the reason + behind Guido's choice of RuntimeError for both this and for the + generator close() method in PEP 342 (from [8]): + + "I'd rather not introduce a new exception class just for this + purpose, since it's not an exception that I want people to catch: + I want it to turn into a traceback which is seen by the + programmer who then fixes the code. So now I believe they + should both raise RuntimeError. + There are some precedents for that: it's raised by the core + Python code in situations where endless recursion is detected, + and for uninitialized objects (and for a variety of + miscellaneous conditions)." + + 3. After this PEP was originally approved, a subsequent discussion + on python-dev [4] settled on the term "context manager" for + objects which provide __enter__ and __exit__ methods, and + "context management protocol" for the protocol itself. With the + addition of the __with__ method to the protocol, a natural + extension is to call all objects which provide a __with__ method + "contexts" (or "manageable contexts" in situations where the + general term "context" would be ambiguous). + This is now documented in the "Standard Terminology" section. + + 4. The originally approved version of this PEP did not include a + __with__ method - the method was only added to the PEP after + Jason Orendorff pointed out the difficulty of writing + appropriate __enter__ and __exit__ methods for decimal.Context + [5]. This approach allows a class to define a native context + manager using generator syntax. It also allows a class to use an + existing independent context manager as its native context + manager by applying the independent context manager to 'self' in + its __with__ method. It even allows a class written in C to use + a generator context manager written in Python. + The __with__ method parallels the __iter__ method which forms + part of the iterator protocol. + + 5. The suggestion was made by Jason Orendorff that the __enter__ + and __exit__ methods could be removed from the context + management protocol, and the protocol instead defined directly + in terms of the enhanced generator interface described in PEP + 342 [6]. + Guido rejected this idea [7]. The following are some of benefits + of keeping the __enter__ and __exit__ methods: + - it makes it easy to implement a simple context manager in C + without having to rely on a separate coroutine builder + - it makes it easy to provide a low-overhead implementation + for context managers which don't need to maintain any + special state between the __enter__ and __exit__ methods + (having to use a generator for these would impose + unnecessary overhead without any compensating benefit) + - it makes it possible to understand how the with statement + works without having to first understand the mechanics of + how generator context managers are implemented. + Examples - (Note: several of these examples contain "yield None". If PEP 342 - is accepted, these can be changed to just "yield".) + (The generator based examples assume PEP 342 is implemented. Also, + some of the examples are likely to be unnecessary in practice, as + the appropriate objects, such as threading.RLock, will be able to + be used directly in with statements) 1. A template for ensuring that a lock, acquired at the start of a block, is released when the block is left: - @contextmanager + @context def locking(lock): lock.acquire() try: - yield None + yield finally: lock.release() @@ -392,7 +545,7 @@ 2. A template for opening a file that ensures the file is closed when the block is left: - @contextmanager + @context def opening(filename, mode="r"): f = open(filename, mode) try: @@ -409,7 +562,7 @@ 3. A template for committing or rolling back a database transaction: - @contextmanager + @context def transactional(db): db.begin() try: @@ -424,18 +577,20 @@ class locking: def __init__(self, lock): self.lock = lock + def __with__(self, lock): + return self def __enter__(self): self.lock.acquire() def __exit__(self, type, value, tb): self.lock.release() (This example is easily modified to implement the other - examples; it shows the relative advantage of using a generator - template.) + examples; it shows that is is easy to avoid the need for a + generator if no special state needs to be preserved.) 5. Redirect stdout temporarily: - @contextmanager + @context def redirecting_stdout(new_stdout): save_stdout = sys.stdout sys.stdout = new_stdout @@ -456,7 +611,7 @@ 6. A variant on opening() that also returns an error condition: - @contextmanager + @context def opening_w_error(filename, mode="r"): try: f = open(filename, mode) @@ -520,54 +675,67 @@ # so this must be outside the with-statement: return +s - 9. Here's a more general Decimal-context-switching template: + 9. Here's a proposed native context manager for decimal.Context: - @contextmanager - def decimal_context(newctx=None): - oldctx = decimal.getcontext() - if newctx is None: - newctx = oldctx.copy() - decimal.setcontext(newctx) - try: - yield newctx - finally: - decimal.setcontext(oldctx) + # This would be a new decimal.Context method + def __with__(self): + # We set the thread context to a copy of this context + # to ensure that changes within the block are kept + # local to the block. This also gives us thread safety + # and supports nested usage of a given context. + newctx = self.copy() + oldctx = decimal.getcontext() + decimal.setcontext(newctx) + try: + yield newctx + finally: + decimal.setcontext(oldctx) - Sample usage: + Sample usage: - def sin(x): - with decimal_context() as ctx: - ctx.prec += 2 - # Rest of algorithm the same as above - return +s + def sin(x): + with decimal.getcontext() as ctx: + ctx.prec += 2 + # Rest of sin calculation algorithm + # uses a precision 2 greater than normal + return +s # Convert result to normal precision - (Nick Coghlan has proposed to add __enter__() and __exit__() - methods to the decimal.Context class so that this example can - be simplified to "with decimal.getcontext() as ctx: ...".) + def sin(x): + with decimal.ExtendedContext: + # Rest of sin calculation algorithm + # uses the Extended Context from the + # General Decimal Arithmetic Specification + return +s # Convert result to normal context 10. A generic "object-closing" template: - @contextmanager + @context def closing(obj): try: yield obj finally: - obj.close() + try: + close = obj.close + except AttributeError: + pass + else: + close() This can be used to deterministically close anything with a - close method, be it file, generator, or something else: + close method, be it file, generator, or something else. It can + even be used when the object isn't guaranteed to require + closing (e.g., a function that accepts an arbitrary iterable): # emulate opening(): with closing(open("argument.txt")) as contradiction: for line in contradiction: print line - # deterministically finalize a generator: - with closing(some_gen()) as data: + # deterministically finalize an iterator: + with closing(iter(data_source)) as data: for datum in data: process(datum) - References [1] http://blogs.msdn.com/oldnewthing/archive/2005/01/06/347666.aspx @@ -576,6 +744,21 @@ [3] http://wiki.python.org/moin/WithStatement + [4] + http://mail.python.org/pipermail/python-dev/2005-July/054658.html + + [5] + http://mail.python.org/pipermail/python-dev/2005-October/056947.html + + [6] + http://mail.python.org/pipermail/python-dev/2005-October/056969.html + + [7] + http://mail.python.org/pipermail/python-dev/2005-October/057018.html + + [8] + http://mail.python.org/pipermail/python-dev/2005-June/054064.html + Copyright This document has been placed in the public domain. From ncoghlan at users.sourceforge.net Sun Oct 16 09:35:53 2005 From: ncoghlan at users.sourceforge.net (ncoghlan@users.sourceforge.net) Date: Sun, 16 Oct 2005 09:35:53 +0200 (CEST) Subject: [Python-checkins] python/nondist/peps pep-0343.txt, 1.30, 1.31 pep-0000.txt, 1.347, 1.348 Message-ID: <20051016073553.9BD8E1E400A@bag.python.org> Update of /cvsroot/python/python/nondist/peps In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12722 Modified Files: pep-0343.txt pep-0000.txt Log Message: Change name of PEP 343 to something less interesting but more obviously descriptive Index: pep-0343.txt =================================================================== RCS file: /cvsroot/python/python/nondist/peps/pep-0343.txt,v retrieving revision 1.30 retrieving revision 1.31 diff -u -d -r1.30 -r1.31 --- pep-0343.txt 16 Oct 2005 07:30:17 -0000 1.30 +++ pep-0343.txt 16 Oct 2005 07:35:50 -0000 1.31 @@ -1,5 +1,5 @@ PEP: 343 -Title: Anonymous Block Redux and Generator Enhancements +Title: The "with" Statement Version: $Revision$ Last-Modified: $Date$ Author: Guido van Rossum, Nick Coghlan Index: pep-0000.txt =================================================================== RCS file: /cvsroot/python/python/nondist/peps/pep-0000.txt,v retrieving revision 1.347 retrieving revision 1.348 diff -u -d -r1.347 -r1.348 --- pep-0000.txt 30 Sep 2005 14:42:36 -0000 1.347 +++ pep-0000.txt 16 Oct 2005 07:35:50 -0000 1.348 @@ -66,7 +66,7 @@ SA 308 Conditional Expressions GvR, Hettinger SA 328 Imports: Multi-Line and Absolute/Relative Aahz SA 342 Coroutines via Enhanced Generators GvR, Eby - SA 343 Anonymous Block Redux and Generator Enhancements GvR + SA 343 The "with" Statement GvR, Coghlan Open PEPs (under consideration) @@ -103,7 +103,7 @@ S 341 Unifying try-except and try-finally Birkenfeld S 344 Exception Chaining and Embedded Tracebacks Yee S 345 Metadata for Python Software Packages 1.2 Jones - P 347 Migrating the Python CVS to Subversion von Lwis + P 347 Migrating the Python CVS to Subversion von L�is S 349 Allow str() to return unicode strings Schemenauer I 350 Codetags Elliott S 754 IEEE 754 Floating Point Special Values Warnes @@ -152,7 +152,7 @@ SF 285 Adding a bool type GvR SF 289 Generator Expressions Hettinger SF 292 Simpler String Substitutions Warsaw - SF 293 Codec Error Handling Callbacks Drwald + SF 293 Codec Error Handling Callbacks D�wald SF 301 Package Index and Metadata for Distutils Jones SF 305 CSV File API Montanaro, et al SF 307 Extensions to the pickle protocol GvR, Peters @@ -339,7 +339,7 @@ I 290 Code Migration and Modernization Hettinger I 291 Backward Compatibility for Standard Library Norwitz SF 292 Simpler String Substitutions Warsaw - SF 293 Codec Error Handling Callbacks Drwald + SF 293 Codec Error Handling Callbacks D�wald SR 294 Type Names in the types Module Tirosh SR 295 Interpretation of multiline string constants Koltsov SR 296 Adding a bytes Object Type Gilbert @@ -392,7 +392,7 @@ S 344 Exception Chaining and Embedded Tracebacks Yee S 345 Metadata for Python Software Packages 1.2 Jones SR 346 User Defined ("with") Statements Coghlan - P 347 Migrating the Python CVS to Subversion von Lwis + P 347 Migrating the Python CVS to Subversion von L�is SR 348 Exception Reorganization for Python 3.0 Cannon S 349 Allow str() to return unicode strings Schemenauer I 350 Codetags Elliott @@ -435,7 +435,7 @@ Cole, Dave djc at object-craft.com.au Craig, Christopher python-pep at ccraig.org Creighton, Laura lac at strakt.com - Drwald, Walter + D�wald, Walter Drake, Fred fdrake at acm.org Dubner, Michael P. dubnerm at mindless.com Dubois, Paul F. paul at pfdubois.com @@ -446,7 +446,7 @@ Evans, Clark C. cce at clarkevans.com Ewing, Greg greg at cosc.canterbury.ac.nz Faassen, Martijn faassen at infrae.com - Giacometti, Frdric B. fred at arakne.com + Giacometti, Fr��ic B. fred at arakne.com Gilbert, Scott xscottg at yahoo.com Goodger, David goodger at python.org Griffin, Grant g2 at iowegian.com From rhettinger at users.sourceforge.net Sun Oct 16 13:17:33 2005 From: rhettinger at users.sourceforge.net (rhettinger@users.sourceforge.net) Date: Sun, 16 Oct 2005 13:17:33 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc developers.txt,1.17,1.18 Message-ID: <20051016111733.99A991E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13979 Modified Files: developers.txt Log Message: Add Nick Coghlan Index: developers.txt =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/developers.txt,v retrieving revision 1.17 retrieving revision 1.18 diff -u -d -r1.17 -r1.18 --- developers.txt 27 Jul 2005 17:59:03 -0000 1.17 +++ developers.txt 16 Oct 2005 11:17:30 -0000 1.18 @@ -17,6 +17,9 @@ Permissions History ------------------- +- Nick Coghlan requested CVS access so he could update his PEP directly. + Granted by GvR on 16 Oct 2005. + - Added two new developers for the Summer of Code project. 8 July 2005 by RDH. Andrew Kuchling will be mentoring Gregory K Johnson for a project to enchance mailbox. Brett Cannon requested access for Flovis @@ -79,5 +82,6 @@ Initials of Project Admins -------------------------- +GvR: Guido van Rossum RDH: Raymond Hettinger TGP: Tim Peters From ncoghlan at users.sourceforge.net Sun Oct 16 14:21:17 2005 From: ncoghlan at users.sourceforge.net (ncoghlan@users.sourceforge.net) Date: Sun, 16 Oct 2005 14:21:17 +0200 (CEST) Subject: [Python-checkins] python/nondist/peps pep-0000.txt,1.348,1.349 Message-ID: <20051016122117.1BFCC1E4002@bag.python.org> Update of /cvsroot/python/python/nondist/peps In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22557 Modified Files: pep-0000.txt Log Message: Fix names broken by Kate editor in previous checkin Index: pep-0000.txt =================================================================== RCS file: /cvsroot/python/python/nondist/peps/pep-0000.txt,v retrieving revision 1.348 retrieving revision 1.349 diff -u -d -r1.348 -r1.349 --- pep-0000.txt 16 Oct 2005 07:35:50 -0000 1.348 +++ pep-0000.txt 16 Oct 2005 12:21:08 -0000 1.349 @@ -103,7 +103,7 @@ S 341 Unifying try-except and try-finally Birkenfeld S 344 Exception Chaining and Embedded Tracebacks Yee S 345 Metadata for Python Software Packages 1.2 Jones - P 347 Migrating the Python CVS to Subversion von L�is + P 347 Migrating the Python CVS to Subversion von Lwis S 349 Allow str() to return unicode strings Schemenauer I 350 Codetags Elliott S 754 IEEE 754 Floating Point Special Values Warnes @@ -152,7 +152,7 @@ SF 285 Adding a bool type GvR SF 289 Generator Expressions Hettinger SF 292 Simpler String Substitutions Warsaw - SF 293 Codec Error Handling Callbacks D�wald + SF 293 Codec Error Handling Callbacks Drwald SF 301 Package Index and Metadata for Distutils Jones SF 305 CSV File API Montanaro, et al SF 307 Extensions to the pickle protocol GvR, Peters @@ -339,7 +339,7 @@ I 290 Code Migration and Modernization Hettinger I 291 Backward Compatibility for Standard Library Norwitz SF 292 Simpler String Substitutions Warsaw - SF 293 Codec Error Handling Callbacks D�wald + SF 293 Codec Error Handling Callbacks Drwald SR 294 Type Names in the types Module Tirosh SR 295 Interpretation of multiline string constants Koltsov SR 296 Adding a bytes Object Type Gilbert @@ -392,7 +392,7 @@ S 344 Exception Chaining and Embedded Tracebacks Yee S 345 Metadata for Python Software Packages 1.2 Jones SR 346 User Defined ("with") Statements Coghlan - P 347 Migrating the Python CVS to Subversion von L�is + P 347 Migrating the Python CVS to Subversion von Lwis SR 348 Exception Reorganization for Python 3.0 Cannon S 349 Allow str() to return unicode strings Schemenauer I 350 Codetags Elliott @@ -435,7 +435,7 @@ Cole, Dave djc at object-craft.com.au Craig, Christopher python-pep at ccraig.org Creighton, Laura lac at strakt.com - D�wald, Walter + Drwald, Walter Drake, Fred fdrake at acm.org Dubner, Michael P. dubnerm at mindless.com Dubois, Paul F. paul at pfdubois.com @@ -446,7 +446,7 @@ Evans, Clark C. cce at clarkevans.com Ewing, Greg greg at cosc.canterbury.ac.nz Faassen, Martijn faassen at infrae.com - Giacometti, Fr��ic B. fred at arakne.com + Giacometti, Frdric B. fred at arakne.com Gilbert, Scott xscottg at yahoo.com Goodger, David goodger at python.org Griffin, Grant g2 at iowegian.com From pje at users.sourceforge.net Sun Oct 16 19:42:15 2005 From: pje at users.sourceforge.net (pje@users.sourceforge.net) Date: Sun, 16 Oct 2005 19:42:15 +0200 (CEST) Subject: [Python-checkins] python/nondist/sandbox/setuptools/setuptools/command easy_install.py, 1.31, 1.32 Message-ID: <20051016174215.9894A1E4002@bag.python.org> Update of /cvsroot/python/python/nondist/sandbox/setuptools/setuptools/command In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv15232/setuptools/command Modified Files: easy_install.py Log Message: Implement --no-deps option, add link to Ian Bicking's non-root Python builder script. Index: easy_install.py =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/setuptools/command/easy_install.py,v retrieving revision 1.31 retrieving revision 1.32 diff -u -d -r1.31 -r1.32 --- easy_install.py 24 Sep 2005 20:29:57 -0000 1.31 +++ easy_install.py 16 Oct 2005 17:42:11 -0000 1.32 @@ -70,13 +70,13 @@ ('always-unzip', 'Z', "don't install as a zipfile, no matter what"), ('site-dirs=','S',"list of directories where .pth files work"), ('editable', 'e', "Install specified packages in editable form"), + ('no-deps', 'N', "don't install dependencies"), ] - boolean_options = [ 'zip-ok', 'multi-version', 'exclude-scripts', 'upgrade', 'always-copy', 'delete-conflicting', 'ignore-conflicts-at-my-risk', 'editable', + 'no-deps', ] - negative_opt = {'always-unzip': 'zip-ok'} create_index = PackageIndex @@ -89,7 +89,7 @@ self.args = None self.optimize = self.record = None self.upgrade = self.always_copy = self.multi_version = None - self.editable = None + self.editable = self.no_deps = None self.root = None # Options not specifiable via command line @@ -222,7 +222,7 @@ for link in self.find_links: self.package_index.scan_url(link) for spec in self.args: - self.easy_install(spec, True) + self.easy_install(spec, not self.no_deps) if self.record: outputs = self.outputs if self.root: # strip any package prefix From pje at users.sourceforge.net Sun Oct 16 19:42:15 2005 From: pje at users.sourceforge.net (pje@users.sourceforge.net) Date: Sun, 16 Oct 2005 19:42:15 +0200 (CEST) Subject: [Python-checkins] python/nondist/sandbox/setuptools EasyInstall.txt, 1.62, 1.63 Message-ID: <20051016174215.C85F11E4002@bag.python.org> Update of /cvsroot/python/python/nondist/sandbox/setuptools In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv15232 Modified Files: EasyInstall.txt Log Message: Implement --no-deps option, add link to Ian Bicking's non-root Python builder script. Index: EasyInstall.txt =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/EasyInstall.txt,v retrieving revision 1.62 retrieving revision 1.63 diff -u -d -r1.62 -r1.63 --- EasyInstall.txt 24 Sep 2005 19:45:20 -0000 1.62 +++ EasyInstall.txt 16 Oct 2005 17:42:11 -0000 1.63 @@ -685,6 +685,11 @@ directory, thereby allowing individual users to install their own Python packages via EasyInstall. +``--no-deps, -N`` (New in 0.6a6) + Don't install any dependencies. This is intended as a convenience for + tools that wrap eggs in a platform-specific packaging system. (We don't + recommend that you use it for anything else.) + Non-Root Installation --------------------- @@ -714,7 +719,11 @@ If you are on a Linux, BSD, Cygwin, or other similar Unix-like operating system, you should create a ``~/lib/python2.x/site-packages`` directory -instead. You will need to know your Python version's ``sys.prefix`` and +instead. (Note: Ian Bicking has created a script that can automate most of the +process that follows; see http://svn.colorstudy.com/home/ianb/non_root_python.py +for details.) + +You will need to know your Python version's ``sys.prefix`` and ``sys.exec_prefix``, which you can find out by running:: python -c "import sys; print sys.prefix; print sys.exec_prefix" @@ -732,7 +741,7 @@ rm ~/lib/python2.4/site-packages mkdir ~/lib/python2.4/site-packages ln -s /usr/local/lib/python2.4/site-packages/* ~/lib/python2.4/site-packages - mkdir ~/include/python2.4 + mkdir -p ~/include/python2.4 ln -s /usr/local/include/python2.4/* ~/include/python2.4 If your ``sys.exec_prefix`` was different from your ``sys.prefix``, you will @@ -768,15 +777,8 @@ * There's no automatic retry for borked Sourceforge mirrors, which can easily time out or be missing a file. - * Wrapping ``easy_install.py`` with the Exemaker utility may cause failures - when building packages that want to compile themselves with optimization - enabled. This is because Exemaker sets ``sys.executable`` to point to the - ``easy_install`` wrapper, instead of to the Python executable, and the - ``distutils.util.byte_compile()`` function expects to be able to invoke - ``sys.executable`` to run a short Python script. Unfortunately, this can't - be directly fixed by EasyInstall; it has to be fixed in the distutils or - in Exemaker. So, don't use Exemaker to wrap ``easy_install.py``, or at any - rate don't expect it to work with all packages. +0.6a6 + * Added ``--no-deps`` option. 0.6a3 * Improved error message when trying to use old ways of running From pje at users.sourceforge.net Sun Oct 16 22:45:34 2005 From: pje at users.sourceforge.net (pje@users.sourceforge.net) Date: Sun, 16 Oct 2005 22:45:34 +0200 (CEST) Subject: [Python-checkins] python/nondist/sandbox/setuptools/setuptools cli.exe, 1.1, 1.2 gui.exe, 1.1, 1.2 Message-ID: <20051016204534.1ABD61E4002@bag.python.org> Update of /cvsroot/python/python/nondist/sandbox/setuptools/setuptools In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19978/setuptools Modified Files: cli.exe gui.exe Log Message: Fix problem with Windows console scripts conflicting with module names, thereby confusing the import process. Scripts are now generated with a suffix of the form '-script.py' to avoid conflicts. (The .exe's are still generated without the '-script' part, so you don't have to type it.) Thanks to Matthew R. Scott for reporting the problem. Index: cli.exe =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/setuptools/cli.exe,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 Binary files /tmp/cvsCUASHv and /tmp/cvsSTFSh5 differ Index: gui.exe =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/setuptools/gui.exe,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 Binary files /tmp/cvscx0JFD and /tmp/cvsKUHbud differ From pje at users.sourceforge.net Sun Oct 16 22:45:34 2005 From: pje at users.sourceforge.net (pje@users.sourceforge.net) Date: Sun, 16 Oct 2005 22:45:34 +0200 (CEST) Subject: [Python-checkins] python/nondist/sandbox/setuptools EasyInstall.txt, 1.63, 1.64 launcher.c, 1.2, 1.3 Message-ID: <20051016204534.2D65A1E4004@bag.python.org> Update of /cvsroot/python/python/nondist/sandbox/setuptools In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19978 Modified Files: EasyInstall.txt launcher.c Log Message: Fix problem with Windows console scripts conflicting with module names, thereby confusing the import process. Scripts are now generated with a suffix of the form '-script.py' to avoid conflicts. (The .exe's are still generated without the '-script' part, so you don't have to type it.) Thanks to Matthew R. Scott for reporting the problem. Index: EasyInstall.txt =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/EasyInstall.txt,v retrieving revision 1.63 retrieving revision 1.64 diff -u -d -r1.63 -r1.64 --- EasyInstall.txt 16 Oct 2005 17:42:11 -0000 1.63 +++ EasyInstall.txt 16 Oct 2005 20:45:28 -0000 1.64 @@ -780,6 +780,9 @@ 0.6a6 * Added ``--no-deps`` option. + * Improved Windows ``.exe`` script wrappers so that the script can have the + same name as a module without confusing Python. + 0.6a3 * Improved error message when trying to use old ways of running ``easy_install``. Removed the ability to run via ``python -m`` or by Index: launcher.c =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/launcher.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- launcher.c 24 Sep 2005 20:29:57 -0000 1.2 +++ launcher.c 16 Oct 2005 20:45:29 -0000 1.3 @@ -12,8 +12,8 @@ To build/rebuild with mingw32, do this in the setuptools project directory: - gcc -DGUI=0 -mno-cygwin -O -s -o setuptools/cli.exe launcher.c - gcc -DGUI=1 -mwindows -mno-cygwin -O -s -o setuptools/gui.exe launcher.c + gcc -DGUI=0 -mno-cygwin -O -s -o setuptools/cli.exe launcher.c + gcc -DGUI=1 -mwindows -mno-cygwin -O -s -o setuptools/gui.exe launcher.c It links to msvcrt.dll, but this shouldn't be a problem since it doesn't actually run Python in the same process. Note that using 'exec' instead @@ -55,7 +55,8 @@ end = script + strlen(script); while( end>script && *end != '.') *end-- = '\0'; - strcat(script, (GUI ? "pyw" : "py")); + *end-- = '\0'; + strcat(script, (GUI ? "-script.pyw" : "-script.py")); /* figure out the target python executable */ @@ -74,7 +75,6 @@ *ptr = '\0'; while (ptr>python && isspace(*ptr)) *ptr-- = '\0'; /* strip trailing sp */ - if (strncmp(python, "#!", 2)) { /* default to python.exe if no #! header */ strcpy(python, "#!python.exe"); From pje at users.sourceforge.net Sun Oct 16 22:45:34 2005 From: pje at users.sourceforge.net (pje@users.sourceforge.net) Date: Sun, 16 Oct 2005 22:45:34 +0200 (CEST) Subject: [Python-checkins] python/nondist/sandbox/setuptools/setuptools/command easy_install.py, 1.32, 1.33 Message-ID: <20051016204534.438F71E4007@bag.python.org> Update of /cvsroot/python/python/nondist/sandbox/setuptools/setuptools/command In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19978/setuptools/command Modified Files: easy_install.py Log Message: Fix problem with Windows console scripts conflicting with module names, thereby confusing the import process. Scripts are now generated with a suffix of the form '-script.py' to avoid conflicts. (The .exe's are still generated without the '-script' part, so you don't have to type it.) Thanks to Matthew R. Scott for reporting the problem. Index: easy_install.py =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/setuptools/command/easy_install.py,v retrieving revision 1.32 retrieving revision 1.33 diff -u -d -r1.32 -r1.33 --- easy_install.py 16 Oct 2005 17:42:11 -0000 1.32 +++ easy_install.py 16 Oct 2005 20:45:30 -0000 1.33 @@ -102,13 +102,13 @@ def delete_blockers(self, blockers): for filename in blockers: - log.info("Deleting %s", filename) - if not self.dry_run: - if os.path.isdir(filename) and not os.path.islink(filename): - shutil.rmtree(filename) - else: - os.unlink(filename) - + if os.path.exists(filename) or os.path.islink(filename): + log.info("Deleting %s", filename) + if not self.dry_run: + if os.path.isdir(filename) and not os.path.islink(filename): + shutil.rmtree(filename) + else: + os.unlink(filename) @@ -464,18 +464,21 @@ " load_entry_point(%(spec)r, %(group)r, %(name)r)()\n" ")\n" ) % locals() - if sys.platform=='win32': # On Windows, add a .py extension and an .exe launcher if group=='gui_scripts': - ext, launcher = '.pyw', 'gui.exe' + ext, launcher = '-script.pyw', 'gui.exe' + old = ['.pyw'] new_header = re.sub('(?i)python.exe','pythonw.exe',header) else: - ext, launcher = '.py', 'cli.exe' + ext, launcher = '-script.py', 'cli.exe' + old = ['.py','.pyc','.pyo'] new_header = re.sub('(?i)pythonw.exe','pythonw.exe',header) - if os.path.exists(new_header[2:-1]): header = new_header + + self.delete_blockers( # clean up old .py/.pyw w/o a script + [os.path.join(self.script_dir,name+x) for x in old]) self.write_script(name+ext, header+script_text) self.write_script( @@ -487,9 +490,6 @@ # write the stub with no extension. self.write_script(name, header+script_text) - - - def install_script(self, dist, script_name, script_text, dev_path=None): """Generate a legacy script wrapper and install it""" spec = str(dist.as_requirement()) From pje at users.sourceforge.net Mon Oct 17 04:26:44 2005 From: pje at users.sourceforge.net (pje@users.sourceforge.net) Date: Mon, 17 Oct 2005 04:26:44 +0200 (CEST) Subject: [Python-checkins] python/nondist/sandbox/setuptools site.py, NONE, 1.1 virtual-python.py, NONE, 1.1 EasyInstall.txt, 1.64, 1.65 api_tests.txt, 1.6, 1.7 pkg_resources.py, 1.73, 1.74 pkg_resources.txt, 1.16, 1.17 setup.py, 1.44, 1.45 Message-ID: <20051017022644.6D37A1E4002@bag.python.org> Update of /cvsroot/python/python/nondist/sandbox/setuptools In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16625 Modified Files: EasyInstall.txt api_tests.txt pkg_resources.py pkg_resources.txt setup.py Added Files: site.py virtual-python.py Log Message: Significantly enhanced support and docs for "non-root" installation, including both "virtual" and PYTHONPATH-based installs. The activation precedence of distributions has also changed so that PYTHONPATH-based non-root installs can include eggs that override system-defined packages (whether managed or unmanaged). This version should eliminate most common installation complaints from non-root Python users. Note: this version includes a hacked 'site.py' to support processing .pth files in directories that come *before* site-packages on sys.path. However, because of its placement, it should only come into play when a user puts the setuptools .egg file *directly* on PYTHONPATH, so it doesn't affect "virtual" or "root" installations. It's strictly to provide support for luddites who refuse to give up their existing non-root PYTHONPATH setup unless you pry it from their cold, dead hands. :) --- NEW FILE: site.py --- """Append module search paths for third-party packages to sys.path. **************************************************************** * This module is automatically imported during initialization, * * if you add the setuptools egg to PYTHONPATH (to support the * * simple non-root installation mode) * **************************************************************** In earlier versions of Python (up to 1.5a3), scripts or modules that needed to use site-specific modules would place ``import site'' somewhere near the top of their code. Because of the automatic import, this is no longer necessary (but code that does it still works). This will append site-specific paths to the module search path. On Unix, it starts with sys.prefix and sys.exec_prefix (if different) and appends lib/python/site-packages as well as lib/site-python. On other platforms (mainly Mac and Windows), it uses just sys.prefix (and sys.exec_prefix, if different, but this is unlikely). The resulting directories, if they exist, are appended to sys.path, and also inspected for path configuration files. A path configuration file is a file whose name has the form .pth; its contents are additional directories (one per line) to be added to sys.path. Non-existing directories (or non-directories) are never added to sys.path; no directory is added to sys.path more than once. Blank lines and lines beginning with '#' are skipped. Lines starting with 'import' are executed. For example, suppose sys.prefix and sys.exec_prefix are set to /usr/local and there is a directory /usr/local/lib/python1.5/site-packages with three subdirectories, foo, bar and spam, and two path configuration files, foo.pth and bar.pth. Assume foo.pth contains the following: # foo package configuration foo bar bletch and bar.pth contains: # bar package configuration bar Then the following directories are added to sys.path, in this order: /usr/local/lib/python1.5/site-packages/bar /usr/local/lib/python1.5/site-packages/foo Note that bletch is omitted because it doesn't exist; bar precedes foo because bar.pth comes alphabetically before foo.pth; and spam is omitted because it is not mentioned in either path configuration file. After these path manipulations, an attempt is made to import a module named sitecustomize, which can perform arbitrary additional site-specific customizations. If this import fails with an ImportError exception, it is silently ignored. """ import sys import os import __builtin__ def makepath(*paths): dir = os.path.abspath(os.path.join(*paths)) return dir, os.path.normcase(dir) def abs__file__(): """Set all module' __file__ attribute to an absolute path""" for m in sys.modules.values(): try: m.__file__ = os.path.abspath(m.__file__) except AttributeError: continue def removeduppaths(): """ Remove duplicate entries from sys.path along with making them absolute""" # This ensures that the initial path provided by the interpreter contains # only absolute pathnames, even if we're running from the build directory. L = [] known_paths = {} for dir in sys.path: # Filter out duplicate paths (on case-insensitive file systems also # if they only differ in case); turn relative paths into absolute # paths. dir, dircase = makepath(dir) if not dircase in known_paths: L.append(dir) known_paths[dircase] = 1 sys.path[:] = L return known_paths # XXX This should not be part of site.py, since it is needed even when # using the -S option for Python. See http://www.python.org/sf/586680 def addbuilddir(): """Append ./build/lib. in case we're running in the build dir (especially for Guido :-)""" from distutils.util import get_platform s = "build/lib.%s-%.3s" % (get_platform(), sys.version) s = os.path.join(os.path.dirname(sys.path[-1]), s) sys.path.append(s) def _init_pathinfo(): """Return a set containing all existing directory entries from sys.path""" d = {} for dir in sys.path: try: if os.path.isdir(dir): dir, dircase = makepath(dir) d[dircase] = 1 except TypeError: continue return d def addpackage(sitedir, name, known_paths): """Add a new path to known_paths by combining sitedir and 'name' or execute sitedir if it starts with 'import'""" if known_paths is None: known_paths = _init_pathinfo() reset = 1 else: reset = 0 fullname = os.path.join(sitedir, name) try: f = open(fullname, "rU") except IOError: return try: for line in f: if line.startswith("#"): continue if line.startswith("import"): exec line continue line = line.rstrip() dir, dircase = makepath(sitedir, line) if not dircase in known_paths and os.path.exists(dir): sys.path.append(dir) known_paths[dircase] = 1 finally: f.close() if reset: known_paths = None return known_paths def addsitedir(sitedir, known_paths=None): """Add 'sitedir' argument to sys.path if missing and handle .pth files in 'sitedir'""" if known_paths is None: known_paths = _init_pathinfo() reset = 1 else: reset = 0 sitedir, sitedircase = makepath(sitedir) if not sitedircase in known_paths: sys.path.append(sitedir) # Add path component try: names = os.listdir(sitedir) except os.error: return names.sort() for name in names: if name.endswith(os.extsep + "pth"): addpackage(sitedir, name, known_paths) if reset: known_paths = None return known_paths def addsitepackages(known_paths): """Add site-packages (and possibly site-python) to sys.path""" prefixes = [sys.prefix] if sys.exec_prefix != sys.prefix: prefixes.append(sys.exec_prefix) for prefix in prefixes: if prefix: if sys.platform in ('os2emx', 'riscos'): sitedirs = [os.path.join(prefix, "Lib", "site-packages")] elif os.sep == '/': sitedirs = [os.path.join(prefix, "lib", "python" + sys.version[:3], "site-packages"), os.path.join(prefix, "lib", "site-python")] else: sitedirs = [prefix, os.path.join(prefix, "lib", "site-packages")] if sys.platform == 'darwin': # for framework builds *only* we add the standard Apple # locations. Currently only per-user, but /Library and # /Network/Library could be added too if 'Python.framework' in prefix: home = os.environ.get('HOME') if home: sitedirs.append( os.path.join(home, 'Library', 'Python', sys.version[:3], 'site-packages')) for sitedir in sys.path: if sitedir and os.path.isdir(sitedir): addsitedir(sitedir, known_paths) return None def setBEGINLIBPATH(): """The OS/2 EMX port has optional extension modules that do double duty as DLLs (and must use the .DLL file extension) for other extensions. The library search path needs to be amended so these will be found during module import. Use BEGINLIBPATH so that these are at the start of the library search path. """ dllpath = os.path.join(sys.prefix, "Lib", "lib-dynload") libpath = os.environ['BEGINLIBPATH'].split(';') if libpath[-1]: libpath.append(dllpath) else: libpath[-1] = dllpath os.environ['BEGINLIBPATH'] = ';'.join(libpath) def setquit(): """Define new built-ins 'quit' and 'exit'. These are simply strings that display a hint on how to exit. """ if os.sep == ':': exit = 'Use Cmd-Q to quit.' elif os.sep == '\\': exit = 'Use Ctrl-Z plus Return to exit.' else: exit = 'Use Ctrl-D (i.e. EOF) to exit.' __builtin__.quit = __builtin__.exit = exit class _Printer(object): """interactive prompt objects for printing the license text, a list of contributors and the copyright notice.""" MAXLINES = 23 def __init__(self, name, data, files=(), dirs=()): self.__name = name self.__data = data self.__files = files self.__dirs = dirs self.__lines = None def __setup(self): if self.__lines: return data = None for dir in self.__dirs: for filename in self.__files: filename = os.path.join(dir, filename) try: fp = file(filename, "rU") data = fp.read() fp.close() break except IOError: pass if data: break if not data: data = self.__data self.__lines = data.split('\n') self.__linecnt = len(self.__lines) def __repr__(self): self.__setup() if len(self.__lines) <= self.MAXLINES: return "\n".join(self.__lines) else: return "Type %s() to see the full %s text" % ((self.__name,)*2) def __call__(self): self.__setup() prompt = 'Hit Return for more, or q (and Return) to quit: ' lineno = 0 while 1: try: for i in range(lineno, lineno + self.MAXLINES): print self.__lines[i] except IndexError: break else: lineno += self.MAXLINES key = None while key is None: key = raw_input(prompt) if key not in ('', 'q'): key = None if key == 'q': break def setcopyright(): """Set 'copyright' and 'credits' in __builtin__""" __builtin__.copyright = _Printer("copyright", sys.copyright) if sys.platform[:4] == 'java': __builtin__.credits = _Printer( "credits", "Jython is maintained by the Jython developers (www.jython.org).") else: __builtin__.credits = _Printer("credits", """\ Thanks to CWI, CNRI, BeOpen.com, Zope Corporation and a cast of thousands for supporting Python development. See www.python.org for more information.""") here = os.path.dirname(os.__file__) __builtin__.license = _Printer( "license", "See http://www.python.org/%.3s/license.html" % sys.version, ["LICENSE.txt", "LICENSE"], [os.path.join(here, os.pardir), here, os.curdir]) class _Helper(object): """Define the built-in 'help'. This is a wrapper around pydoc.help (with a twist). """ def __repr__(self): return "Type help() for interactive help, " \ "or help(object) for help about object." def __call__(self, *args, **kwds): import pydoc return pydoc.help(*args, **kwds) def sethelper(): __builtin__.help = _Helper() def aliasmbcs(): """On Windows, some default encodings are not provided by Python, while they are always available as "mbcs" in each locale. Make them usable by aliasing to "mbcs" in such a case.""" if sys.platform == 'win32': import locale, codecs enc = locale.getdefaultlocale()[1] if enc.startswith('cp'): # "cp***" ? try: codecs.lookup(enc) except LookupError: import encodings encodings._cache[enc] = encodings._unknown encodings.aliases.aliases[enc] = 'mbcs' def setencoding(): """Set the string encoding used by the Unicode implementation. The default is 'ascii', but if you're willing to experiment, you can change this.""" encoding = "ascii" # Default value set by _PyUnicode_Init() if 0: # Enable to support locale aware default string encodings. import locale loc = locale.getdefaultlocale() if loc[1]: encoding = loc[1] if 0: # Enable to switch off string to Unicode coercion and implicit # Unicode to string conversion. encoding = "undefined" if encoding != "ascii": # On Non-Unicode builds this will raise an AttributeError... sys.setdefaultencoding(encoding) # Needs Python Unicode build ! def execsitecustomize(): """Run custom site specific code, if available.""" try: import sitecustomize except ImportError: pass def main(): abs__file__() paths_in_sys = removeduppaths() if (os.name == "posix" and sys.path and os.path.basename(sys.path[-1]) == "Modules"): addbuilddir() paths_in_sys = addsitepackages(paths_in_sys) if sys.platform == 'os2emx': setBEGINLIBPATH() setquit() setcopyright() sethelper() aliasmbcs() setencoding() execsitecustomize() # Remove sys.setdefaultencoding() so that users cannot change the # encoding after initialization. The test for presence is needed when # this module is run as a script, because this code is executed twice. if hasattr(sys, "setdefaultencoding"): del sys.setdefaultencoding main() def _test(): print "sys.path = [" for dir in sys.path: print " %r," % (dir,) print "]" if __name__ == '__main__': _test() --- NEW FILE: virtual-python.py --- """Create a "virtual" Python installation Based on a script created by Ian Bicking.""" import sys, os, optparse, shutil join = os.path.join py_version = 'python%s.%s' % (sys.version_info[0], sys.version_info[1]) def mkdir(path): if not os.path.exists(path): print 'Creating %s' % path os.makedirs(path) else: if verbose: print 'Directory %s already exists' def symlink(src, dest): if not os.path.exists(dest): if verbose: print 'Creating symlink %s' % dest os.symlink(src, dest) else: print 'Symlink %s already exists' % dest def rmtree(dir): if os.path.exists(dir): print 'Deleting tree %s' % dir shutil.rmtree(dir) else: if verbose: print 'Do not need to delete %s; already gone' % dir def make_exe(fn): if os.name == 'posix': oldmode = os.stat(fn).st_mode & 07777 newmode = (oldmode | 0555) & 07777 os.chmod(fn, newmode) if verbose: print 'Changed mode of %s to %s' % (fn, oct(newmode)) def main(): if os.name != 'posix': print "This script only works on Unix-like platforms, sorry." return parser = optparse.OptionParser() parser.add_option('-v', '--verbose', action='count', dest='verbose', default=0, help="Increase verbosity") parser.add_option('--prefix', dest="prefix", default='~', help="The base directory to install to (default ~)") parser.add_option('--clear', dest='clear', action='store_true', help="Clear out the non-root install and start from scratch") parser.add_option('--no-site-packages', dest='no_site_packages', action='store_true', help="Don't copy the contents of the global site-packages dir to the " "non-root site-packages") options, args = parser.parse_args() global verbose home_dir = os.path.expanduser(options.prefix) lib_dir = join(home_dir, 'lib', py_version) inc_dir = join(home_dir, 'include', py_version) bin_dir = join(home_dir, 'bin') if sys.executable.startswith(bin_dir): print 'Please use the *system* python to run this script' return verbose = options.verbose assert not args, "No arguments allowed" if options.clear: rmtree(lib_dir) rmtree(inc_dir) print 'Not deleting', bin_dir prefix = sys.prefix mkdir(lib_dir) stdlib_dir = join(prefix, 'lib', py_version) for fn in os.listdir(stdlib_dir): if fn != 'site-packages': symlink(join(stdlib_dir, fn), join(lib_dir, fn)) mkdir(join(lib_dir, 'site-packages')) if not options.no_site_packages: for fn in os.listdir(join(stdlib_dir, 'site-packages')): symlink(join(stdlib_dir, 'site-packages', fn), join(lib_dir, 'site-packages', fn)) mkdir(inc_dir) stdinc_dir = join(prefix, 'include', py_version) for fn in os.listdir(stdinc_dir): symlink(join(stdinc_dir, fn), join(inc_dir, fn)) if sys.exec_prefix != sys.prefix: exec_dir = join(sys.exec_prefix, 'lib', py_version) for fn in os.listdir(exec_dir): symlink(join(exec_dir, fn), join(lib_dir, fn)) mkdir(bin_dir) print 'Copying %s to %s' % (sys.executable, bin_dir) py_executable = join(bin_dir, 'python') if sys.executable != py_executable: shutil.copyfile(sys.executable, py_executable) make_exe(py_executable) pydistutils = os.path.expanduser('~/.pydistutils.cfg') if os.path.exists(pydistutils): print 'Please make sure you remove any previous custom paths from' print "your", pydistutils, "file." print "You're now ready to download ez_setup.py, and run" print py_executable, "ez_setup.py" if __name__ == '__main__': main() Index: EasyInstall.txt =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/EasyInstall.txt,v retrieving revision 1.64 retrieving revision 1.65 diff -u -d -r1.64 -r1.65 --- EasyInstall.txt 16 Oct 2005 20:45:28 -0000 1.64 +++ EasyInstall.txt 17 Oct 2005 02:26:39 -0000 1.65 @@ -25,6 +25,8 @@ ==================== +.. _installation instructions: + Installing "Easy Install" ------------------------- @@ -718,12 +720,46 @@ now or in the future.) If you are on a Linux, BSD, Cygwin, or other similar Unix-like operating -system, you should create a ``~/lib/python2.x/site-packages`` directory -instead. (Note: Ian Bicking has created a script that can automate most of the -process that follows; see http://svn.colorstudy.com/home/ianb/non_root_python.py -for details.) +system, you have a couple of different options. You can create a "virtual" +Python installation, which uses its own library directories and some symlinks +to the site-wide Python. Or, you can use a "traditional" ``PYTHONPATH``-based +installation, which isn't as flexible, but which you may find more familiar, +especially if you already have a custom ``PYTHONPATH`` set up. -You will need to know your Python version's ``sys.prefix`` and + +Creating a "Virtual" Python +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the simplest case, your virtual Python installation will live under the +``~/lib/python2.x``, ``~/include/python2.x``, and ``~/bin`` directories. Just +download `virtual-python.py`_ and run it using the site-wide Python. If you +want to customize the location, you can use the ``--prefix`` option to specify +an installation base directory in place of ``~``. (Use ``--help`` to get the +complete list of options.) + +.. _virtual-python.py: http://peak.telecommunity.com/dist/virtual-python.py + +When you're done, you'll have a ``~/bin/python`` executable that's linked to +the local Python installation and inherits all its current libraries, but which +allows you to add as many new libraries as you want. Simply use this new +Python in place of your system-defined one, and you can modify it as you like +without breaking anything that relies on the system Python. You'll also still +need to follow the standard `installation instructions`_ to install setuptools +and EasyInstall, using your new ``~/bin/python`` executable in place of the +system Python. + +Note that if you were previously setting a ``PYTHONPATH`` and/or had other +special configuration options in your ``~/.pydistutils.cfg``, you may need to +remove these settings *before* running ``virtual-python.py``. You should +also make sure that the ``~/bin`` directory (or whatever directory you choose) +is on your ``PATH``, because that is where EasyInstall will install new Python +scripts. + +If you'd prefer to do the installation steps by hand, or just want to know what +the script will do, here are the steps. (If you don't care how it works, you +can just skip the rest of this section.) + +First, you will need to know your Python version's ``sys.prefix`` and ``sys.exec_prefix``, which you can find out by running:: python -c "import sys; print sys.prefix; print sys.exec_prefix" @@ -761,13 +797,43 @@ Do NOT use a symlink! The Python binary must be copied or hardlinked, otherwise it will use the system ``site-packages`` directory and not yours. -Note that if you were previously setting a ``PYTHONPATH`` and/or had other -special configuration options in your ``~/.pydistutils.cfg``, you may need to -remove these settings and relocate any older installed modules to your -new ``~/lib/python2.x/site-packages`` directory. Also note that you must now -make sure to use the ``~/bin/python`` executable instead of the system Python, -and ideally you should put the ``~/bin`` directory first on your ``PATH`` as -well, because that is where EasyInstall will install new Python scripts. +You can now proceed with the standard `installation instructions`_. + + +"Traditional" ``PYTHONPATH``-based Installation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This installation method is not as robust or as flexible as `creating a +"virtual" python`_ installation, as it uses various tricks to fool Python into +processing ``.pth`` files where it normally wouldn't. We suggest you try the +virtual Python approach first, as we are providing this method mainly for +people who just can't get past their unshakeable belief that creating a virtual +python is somehow "Not Right", or that putting stuff on ``PYTHONPATH`` "Should +Just Work, Darnit." So, if you're not one of those people, you don't +need these instructions. :-) + +Assuming that you want to install packages in a directory called ``~/py-lib``, +and scripts in ``~/bin``, here's what you need to do: + +First, edit ``~/.pydistutils.cfg`` to include these settings:: + + [install] + install_lib = ~/py-lib + install_scripts = ~/bin + + [easy_install] + site_dirs = ~/py_lib + +Be sure to do this *before* you try to run the ``ez_setup.py`` installation +script. Then, follow the standard `installation instructions`_, but take +careful note of the full pathname of the ``.egg`` file that gets installed, so +that you can add it to your ``PYTHONPATH``, along with ``~/py_lib``. + +You *must* add the setuptools egg file to your ``PYTHONPATH`` manually, or it +will not work, and neither will any other packages you install with +EasyInstall. You will not, however, have to manually add any other +packages to the ``PYTHONPATH``; EasyInstall will take care of them for you, as +long as the setuptools egg is explicitly listed in ``PYTHONPATH``. Release Notes/Change History @@ -778,6 +844,12 @@ time out or be missing a file. 0.6a6 + * Added support for "traditional" PYTHONPATH-based non-root installation, and + also the convenient ``virtual-python.py`` script, based on a contribution + by Ian Bicking. The setuptools egg now contains a hacked ``site`` module + that makes the PYTHONPATH-based approach work with .pth files, so that you + can get the full EasyInstall feature set on such installations. + * Added ``--no-deps`` option. * Improved Windows ``.exe`` script wrappers so that the script can have the Index: api_tests.txt =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/api_tests.txt,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- api_tests.txt 13 Aug 2005 23:04:08 -0000 1.6 +++ api_tests.txt 17 Oct 2005 02:26:39 -0000 1.7 @@ -182,12 +182,12 @@ ['...example.com...', '...pkg_resources...', '...pkg_resources...'] And you can specify the path entry a distribution was found under, using the -optional second parameter to ``add()`` +optional second parameter to ``add()``:: + >>> ws = WorkingSet([]) >>> ws.add(dist,"foo") - >>> ws.add(dist,"bar") >>> ws.entries - ['http://example.com/something', ..., 'foo', 'bar'] + ['foo'] But even if a distribution is found under multiple path entries, it still only shows up once when iterating the working set: @@ -222,14 +222,14 @@ >>> def added(dist): print "Added", dist >>> ws.subscribe(added) Added Bar 0.9 - >>> foo12 = Distribution(project_name="Foo", version="1.2") + >>> foo12 = Distribution(project_name="Foo", version="1.2", location="f12") >>> ws.add(foo12) Added Foo 1.2 Note, however, that only the first distribution added for a given project name will trigger a callback, even during the initial ``subscribe()`` callback:: - >>> foo14 = Distribution(project_name="Foo", version="1.4") + >>> foo14 = Distribution(project_name="Foo", version="1.4", location="f14") >>> ws.add(foo14) # no callback, because Foo 1.2 is already active >>> ws = WorkingSet([]) Index: pkg_resources.py =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/pkg_resources.py,v retrieving revision 1.73 retrieving revision 1.74 diff -u -d -r1.73 -r1.74 --- pkg_resources.py 26 Sep 2005 00:35:35 -0000 1.73 +++ pkg_resources.py 17 Oct 2005 02:26:39 -0000 1.74 @@ -356,7 +356,7 @@ self.entry_keys.setdefault(entry, []) self.entries.append(entry) for dist in find_distributions(entry, True): - self.add(dist, entry) + self.add(dist, entry, False) def __contains__(self,dist): @@ -421,7 +421,7 @@ seen[key]=1 yield self.by_key[key] - def add(self, dist, entry=None): + def add(self, dist, entry=None, insert=True): """Add `dist` to working set, associated with `entry` If `entry` is unspecified, it defaults to the ``.location`` of `dist`. @@ -432,23 +432,23 @@ doesn't already have a distribution in the set. If it's added, any callbacks registered with the ``subscribe()`` method will be called. """ + if insert: + dist.insert_on(self.entries, entry) + if entry is None: entry = dist.location - - if entry not in self.entry_keys: - self.entries.append(entry) - self.entry_keys[entry] = [] + keys = self.entry_keys.setdefault(entry,[]) if dist.key in self.by_key: return # ignore hidden distros self.by_key[dist.key] = dist - keys = self.entry_keys[entry] if dist.key not in keys: keys.append(dist.key) self._added_new(dist) + def resolve(self, requirements, env=None, installer=None): """List all distributions needed to (recursively) meet `requirements` @@ -1837,12 +1837,12 @@ def activate(self,path=None): """Ensure distribution is importable on `path` (default=sys.path)""" if path is None: path = sys.path - if self.location not in path: - path.append(self.location) + self.insert_on(path) if path is sys.path: fixup_namespace_packages(self.location) map(declare_namespace, self._get_metadata('namespace_packages.txt')) + def egg_name(self): """Return what this distribution's standard .egg filename should be""" filename = "%s-%s-py%s" % ( @@ -1907,6 +1907,47 @@ """Return the EntryPoint object for `group`+`name`, or ``None``""" return self.get_entry_map(group).get(name) + def insert_on(self, path, loc = None): + """Insert self.location in path before its nearest parent directory""" + loc = loc or self.location + if not loc: return + if path is sys.path: + self.check_version_conflict() + best, pos = 0, -1 + for p,item in enumerate(path): + if loc.startswith(item) and len(item)>best and loc<>item: + best, pos = len(item), p + if pos==-1: + if loc not in path: path.append(loc) + elif loc not in path[:pos+1]: + while loc in path: path.remove(loc) + path.insert(pos,loc) + + + + def check_version_conflict(self): + if self.key=='setuptools': + return # ignore the inevitable setuptools self-conflicts :( + + nsp = dict.fromkeys(self._get_metadata('namespace_packages.txt')) + + for modname in self._get_metadata('top_level.txt'): + if modname not in sys.modules or modname in nsp: + continue + + fn = getattr(sys.modules[modname], '__file__', None) + if fn and fn.startswith(self.location): + continue + + from warnings import warn + warn( + "Module %s was already imported from %s, but %s is being added" + " to sys.path" % (modname, fn, self.location) + ) + + + + @@ -2165,9 +2206,9 @@ add_activation_listener = working_set.subscribe run_script = working_set.run_script run_main = run_script # backward compatibility - # Activate all distributions already on sys.path, and ensure that # all distributions added to the working set in the future (e.g. by # calling ``require()``) will get activated as well. add_activation_listener(lambda dist: dist.activate()) +working_set.entries=[]; map(working_set.add_entry,sys.path) # match order Index: pkg_resources.txt =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/pkg_resources.txt,v retrieving revision 1.16 retrieving revision 1.17 diff -u -d -r1.16 -r1.17 --- pkg_resources.txt 26 Sep 2005 00:35:35 -0000 1.16 +++ pkg_resources.txt 17 Oct 2005 02:26:39 -0000 1.17 @@ -1488,6 +1488,17 @@ Release Notes/Change History ---------------------------- +0.6a6 + * Activated distributions are now inserted in ``sys.path`` (and the working + set) just before the directory that contains them, instead of at the end. + This allows e.g. eggs in ``site-packages`` to override unmanged modules in + the same location, and allows eggs found earlier on ``sys.path`` to override + ones found later. + + * When a distribution is activated, it now checks whether any contained + non-namespace modules have already been imported and issues a warning if + a conflicting module has already been imported. + 0.6a4 * Fix a bug in ``WorkingSet.resolve()`` that was introduced in 0.6a3. Index: setup.py =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/setup.py,v retrieving revision 1.44 retrieving revision 1.45 diff -u -d -r1.44 -r1.45 --- setup.py 29 Sep 2005 17:05:43 -0000 1.44 +++ setup.py 17 Oct 2005 02:26:39 -0000 1.45 @@ -37,7 +37,7 @@ test_suite = 'setuptools.tests.test_suite', packages = find_packages(), package_data = {'setuptools': ['*.exe']}, - py_modules = ['pkg_resources', 'easy_install'], + py_modules = ['pkg_resources', 'easy_install', 'site'], zip_safe = False, # We want 'python -m easy_install' to work, for now :( entry_points = { From pje at users.sourceforge.net Mon Oct 17 04:39:03 2005 From: pje at users.sourceforge.net (pje@users.sourceforge.net) Date: Mon, 17 Oct 2005 04:39:03 +0200 (CEST) Subject: [Python-checkins] python/nondist/sandbox/setuptools site.py, 1.1, 1.2 Message-ID: <20051017023903.E1E551E4002@bag.python.org> Update of /cvsroot/python/python/nondist/sandbox/setuptools In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19037 Modified Files: site.py Log Message: Fix a typo in patched site.py. Index: site.py =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/site.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- site.py 17 Oct 2005 02:26:39 -0000 1.1 +++ site.py 17 Oct 2005 02:39:00 -0000 1.2 @@ -200,7 +200,7 @@ 'Python', sys.version[:3], 'site-packages')) - for sitedir in sys.path: + for sitedir in sys.path+sitedirs: if sitedir and os.path.isdir(sitedir): addsitedir(sitedir, known_paths) return None From pje at users.sourceforge.net Mon Oct 17 04:44:42 2005 From: pje at users.sourceforge.net (pje@users.sourceforge.net) Date: Mon, 17 Oct 2005 04:44:42 +0200 (CEST) Subject: [Python-checkins] python/nondist/sandbox/setuptools/setuptools __init__.py, 1.27, 1.28 Message-ID: <20051017024442.F2DA21E4002@bag.python.org> Update of /cvsroot/python/python/nondist/sandbox/setuptools/setuptools In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19686/setuptools Modified Files: __init__.py Log Message: Prep for 0.6a6 release. Index: __init__.py =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/setuptools/__init__.py,v retrieving revision 1.27 retrieving revision 1.28 diff -u -d -r1.27 -r1.28 --- __init__.py 29 Sep 2005 17:05:43 -0000 1.27 +++ __init__.py 17 Oct 2005 02:44:39 -0000 1.28 @@ -8,7 +8,7 @@ from distutils.util import convert_path import os.path -__version__ = '0.6a5' +__version__ = '0.6a6' __all__ = [ 'setup', 'Distribution', 'Feature', 'Command', 'Extension', 'Require', 'find_packages' From pje at users.sourceforge.net Mon Oct 17 04:44:43 2005 From: pje at users.sourceforge.net (pje@users.sourceforge.net) Date: Mon, 17 Oct 2005 04:44:43 +0200 (CEST) Subject: [Python-checkins] python/nondist/sandbox/setuptools ez_setup.py, 1.34, 1.35 setup.py, 1.45, 1.46 setuptools.txt, 1.41, 1.42 Message-ID: <20051017024443.2885A1E4002@bag.python.org> Update of /cvsroot/python/python/nondist/sandbox/setuptools In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19686 Modified Files: ez_setup.py setup.py setuptools.txt Log Message: Prep for 0.6a6 release. Index: ez_setup.py =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/ez_setup.py,v retrieving revision 1.34 retrieving revision 1.35 diff -u -d -r1.34 -r1.35 --- ez_setup.py 29 Sep 2005 17:05:43 -0000 1.34 +++ ez_setup.py 17 Oct 2005 02:44:39 -0000 1.35 @@ -14,7 +14,7 @@ This file can also be run as a script to install or upgrade setuptools. """ import sys -DEFAULT_VERSION = "0.6a5" +DEFAULT_VERSION = "0.6a6" DEFAULT_URL = "http://cheeseshop.python.org/packages/%s/s/setuptools/" % sys.version[:3] md5_data = { Index: setup.py =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/setup.py,v retrieving revision 1.45 retrieving revision 1.46 diff -u -d -r1.45 -r1.46 --- setup.py 17 Oct 2005 02:26:39 -0000 1.45 +++ setup.py 17 Oct 2005 02:44:39 -0000 1.46 @@ -15,7 +15,7 @@ f.close() return ''.join(lines) -VERSION = "0.6a5" +VERSION = "0.6a6" from setuptools import setup, find_packages import sys from setuptools.command import __all__ as SETUP_COMMANDS Index: setuptools.txt =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/setuptools.txt,v retrieving revision 1.41 retrieving revision 1.42 diff -u -d -r1.41 -r1.42 --- setuptools.txt 29 Sep 2005 17:05:43 -0000 1.41 +++ setuptools.txt 17 Oct 2005 02:44:39 -0000 1.42 @@ -221,6 +221,13 @@ your build process; for example, extensions that process setup() arguments and turn them into EGG-INFO metadata files. + (Note: projects listed in ``setup_requires`` will NOT be automatically + installed on the system where the setup script is being run. They are + simply downloaded to the setup directory if they're not locally available + already. If you want them to be installed, as well as being available + when the setup script is run, you should add them to ``install_requires`` + **and** ``setup_requires``.) + ``namespace_packages`` A list of strings naming the project's "namespace packages". A namespace package is a package that may be split across multiple project From rhettinger at users.sourceforge.net Mon Oct 17 05:37:30 2005 From: rhettinger at users.sourceforge.net (rhettinger@users.sourceforge.net) Date: Mon, 17 Oct 2005 05:37:30 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc developers.txt,1.18,1.19 Message-ID: <20051017033730.252B81E42B6@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv28244 Modified Files: developers.txt Log Message: Drop permissions for Gregory K Johnson. Index: developers.txt =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/developers.txt,v retrieving revision 1.18 retrieving revision 1.19 diff -u -d -r1.18 -r1.19 --- developers.txt 16 Oct 2005 11:17:30 -0000 1.18 +++ developers.txt 17 Oct 2005 03:37:26 -0000 1.19 @@ -53,6 +53,10 @@ Permissions Dropped on Request ------------------------------ +- Per note from Andrew Kuchling, the permissions for Gregory K Johnson + and the Summer Of Code project are no longer needed. AMK will make + any future checkins directly. 16 Oct 2005 RDH + - Johannes Gijsbers sent a drop request. 27 July 2005 RDH - Flovis Bruynooghe sent a drop request. 14 July 2005 RDH From ncoghlan at users.sourceforge.net Mon Oct 17 11:15:10 2005 From: ncoghlan at users.sourceforge.net (ncoghlan@users.sourceforge.net) Date: Mon, 17 Oct 2005 11:15:10 +0200 (CEST) Subject: [Python-checkins] python/nondist/peps pep-0343.txt, 1.31, 1.32 pep-0000.txt, 1.349, 1.350 Message-ID: <20051017091510.A8BD11E4002@bag.python.org> Update of /cvsroot/python/python/nondist/peps In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17998 Modified Files: pep-0343.txt pep-0000.txt Log Message: Revert PEP 343 to draft status until Guido is happy with the update Index: pep-0343.txt =================================================================== RCS file: /cvsroot/python/python/nondist/peps/pep-0343.txt,v retrieving revision 1.31 retrieving revision 1.32 diff -u -d -r1.31 -r1.32 --- pep-0343.txt 16 Oct 2005 07:35:50 -0000 1.31 +++ pep-0343.txt 17 Oct 2005 09:15:06 -0000 1.32 @@ -3,7 +3,7 @@ Version: $Revision$ Last-Modified: $Date$ Author: Guido van Rossum, Nick Coghlan -Status: Accepted +Status: Draft Type: Standards Track Content-Type: text/plain Created: 13-May-2005 @@ -16,7 +16,8 @@ The PEP has been approved in principle by the BDFL, but there are still a couple of implementation details to be worked out (see the - section on Open Issues). + section on Open Issues). It's been reverted to Draft status until + those issues have been settled to Guido's satisfaction. Author's Note Index: pep-0000.txt =================================================================== RCS file: /cvsroot/python/python/nondist/peps/pep-0000.txt,v retrieving revision 1.349 retrieving revision 1.350 diff -u -d -r1.349 -r1.350 --- pep-0000.txt 16 Oct 2005 12:21:08 -0000 1.349 +++ pep-0000.txt 17 Oct 2005 09:15:06 -0000 1.350 @@ -66,7 +66,6 @@ SA 308 Conditional Expressions GvR, Hettinger SA 328 Imports: Multi-Line and Absolute/Relative Aahz SA 342 Coroutines via Enhanced Generators GvR, Eby - SA 343 The "with" Statement GvR, Coghlan Open PEPs (under consideration) @@ -101,6 +100,7 @@ S 337 Logging Usage in the Standard Library Dubner S 338 Executing modules inside packages with '-m' Coghlan S 341 Unifying try-except and try-finally Birkenfeld + S 343 The "with" Statement GvR, Coghlan S 344 Exception Chaining and Embedded Tracebacks Yee S 345 Metadata for Python Software Packages 1.2 Jones P 347 Migrating the Python CVS to Subversion von Lwis @@ -388,7 +388,7 @@ SR 340 Anonymous Block Statements GvR S 341 Unifying try-except and try-finally Birkenfeld SA 342 Coroutines via Enhanced Generators GvR, Eby - SA 343 Anonymous Block Redux and Generator Enhancements GvR + S 343 Anonymous Block Redux and Generator Enhancements GvR S 344 Exception Chaining and Embedded Tracebacks Yee S 345 Metadata for Python Software Packages 1.2 Jones SR 346 User Defined ("with") Statements Coghlan From jhylton at users.sourceforge.net Mon Oct 17 17:01:38 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Mon, 17 Oct 2005 17:01:38 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python newcompile.c, 1.1.2.122, 1.1.2.123 Message-ID: <20051017150138.9F7EC1E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29562/Python Modified Files: Tag: ast-branch newcompile.c Log Message: Fix stack depth computation for yield (expression, not statement). Index: newcompile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/Attic/newcompile.c,v retrieving revision 1.1.2.122 retrieving revision 1.1.2.123 diff -u -d -r1.1.2.122 -r1.1.2.123 --- newcompile.c 16 Oct 2005 05:19:44 -0000 1.1.2.122 +++ newcompile.c 17 Oct 2005 15:01:34 -0000 1.1.2.123 @@ -1544,7 +1544,7 @@ case EXEC_STMT: return -3; case YIELD_VALUE: - return -1; + return 0; case POP_BLOCK: return 0; From jhylton at users.sourceforge.net Mon Oct 17 17:08:37 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Mon, 17 Oct 2005 17:08:37 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python ast.c,1.1.2.70,1.1.2.71 Message-ID: <20051017150837.782311E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29976/Python Modified Files: Tag: ast-branch ast.c Log Message: Fix handling of testlist_gexp. Created a single function that handles the several possible testlist variants, with a flag indicating whether a gexp is possible. Note that I don't understand why that Grammar allows testlist_gexp in some places but not others. Address several other XXX comments, including calls to free. Reindent a few functions to four spaces so that they can be read. Index: ast.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/Attic/ast.c,v retrieving revision 1.1.2.70 retrieving revision 1.1.2.71 diff -u -d -r1.1.2.70 -r1.1.2.71 --- ast.c 16 Oct 2005 05:19:44 -0000 1.1.2.70 +++ ast.c 17 Oct 2005 15:08:33 -0000 1.1.2.71 @@ -35,7 +35,7 @@ static stmt_ty ast_for_stmt(struct compiling *, const node *); static asdl_seq *ast_for_suite(struct compiling *, const node *); static asdl_seq *ast_for_exprlist(struct compiling *, const node *, int); -static expr_ty ast_for_testlist(struct compiling *, const node *); +static expr_ty ast_for_testlist(struct compiling *, const node *, int); /* Note different signature for ast_for_call */ static expr_ty ast_for_call(struct compiling *, const node *, expr_ty); @@ -249,8 +249,9 @@ return Module(stmts); case eval_input: { expr_ty testlist_ast; - - testlist_ast = ast_for_testlist(&c, CHILD(n, 0)); + + /* XXX Why not gen_for here? */ + testlist_ast = ast_for_testlist(&c, CHILD(n, 0), 0); if (!testlist_ast) goto error; return Expression(testlist_ast); @@ -512,7 +513,11 @@ seq_for_testlist(struct compiling *c, const node *n) { /* testlist: test (',' test)* [','] */ - REQ(n, testlist); + assert(TYPE(n) == testlist + || TYPE(n) == listmaker + || TYPE(n) == testlist_gexp + || TYPE(n) == testlist_safe + ); asdl_seq *seq; expr_ty expression; int i; @@ -641,10 +646,12 @@ ast_error(CHILD(ch, 0), "assignment to None"); goto error; } - /* XXX check return value of Name call */ - asdl_seq_APPEND(args, - Name(NEW_IDENTIFIER(CHILD(ch, 0)), - Param, LINENO(ch))); + expr_ty name = Name(NEW_IDENTIFIER(CHILD(ch, 0)), + Param, LINENO(ch)); + if (!name) + goto error; + asdl_seq_APPEND(args, name); + } i += 2; /* the name and the comma */ break; @@ -955,7 +962,7 @@ listcomps = asdl_seq_new(n_fors); if (!listcomps) { - /* XXX free(elt); */ + free_expr(elt); return NULL; } @@ -970,13 +977,14 @@ t = ast_for_exprlist(c, CHILD(ch, 1), Store); if (!t) { asdl_seq_free(listcomps); - /* XXX free(elt); */ + free_expr(elt); return NULL; } - expression = ast_for_testlist(c, CHILD(ch, 3)); + expression = ast_for_testlist(c, CHILD(ch, 3), 0); if (!expression) { + asdl_seq_free(t); asdl_seq_free(listcomps); - /* XXX free(elt); */ + free_expr(elt); return NULL; } @@ -986,8 +994,10 @@ lc = comprehension(Tuple(t, Store, LINENO(ch)), expression, NULL); if (!lc) { + asdl_seq_free(t); asdl_seq_free(listcomps); - /* XXX free(elt); */ + free_expr(expression); + free_expr(elt); return NULL; } @@ -999,14 +1009,14 @@ n_ifs = count_list_ifs(ch); if (n_ifs == -1) { asdl_seq_free(listcomps); - /* XXX free(elt); */ + free_expr(elt); return NULL; } ifs = asdl_seq_new(n_ifs); if (!ifs) { asdl_seq_free(listcomps); - /* XXX free(elt); */ + free_expr(elt); return NULL; } @@ -1096,211 +1106,209 @@ static expr_ty ast_for_genexp(struct compiling *c, const node *n) { - /* testlist_gexp: test ( gen_for | (',' test)* [','] ) - argument: [test '='] test [gen_for] # Really [keyword '='] test */ - expr_ty elt; - asdl_seq *genexps; - int i, n_fors; - node *ch; + /* testlist_gexp: test ( gen_for | (',' test)* [','] ) + argument: [test '='] test [gen_for] # Really [keyword '='] test */ + expr_ty elt; + asdl_seq *genexps; + int i, n_fors; + node *ch; + + assert(TYPE(n) == (testlist_gexp) || TYPE(n) == (argument)); + assert(NCH(n) > 1); + + elt = ast_for_expr(c, CHILD(n, 0)); + if (!elt) + return NULL; + + n_fors = count_gen_fors(n); + if (n_fors == -1) + return NULL; + + genexps = asdl_seq_new(n_fors); + if (!genexps) { + free_expr(elt); + return NULL; + } + + ch = CHILD(n, 1); + for (i = 0; i < n_fors; i++) { + comprehension_ty ge; + asdl_seq *t; + expr_ty expression; + + REQ(ch, gen_for); + + t = ast_for_exprlist(c, CHILD(ch, 1), Store); + if (!t) { + asdl_seq_free(genexps); + free_expr(elt); + return NULL; + } + expression = ast_for_testlist(c, CHILD(ch, 3), 1); + if (!expression) { + asdl_seq_free(genexps); + free_expr(elt); + return NULL; + } + + if (asdl_seq_LEN(t) == 1) + ge = comprehension(asdl_seq_GET(t, 0), expression, + NULL); + else + ge = comprehension(Tuple(t, Store, LINENO(ch)), + expression, NULL); + + if (!ge) { + asdl_seq_free(genexps); + free_expr(elt); + return NULL; + } + + if (NCH(ch) == 5) { + int j, n_ifs; + asdl_seq *ifs; + + ch = CHILD(ch, 4); + n_ifs = count_gen_ifs(ch); + if (n_ifs == -1) { + asdl_seq_free(genexps); + free_expr(elt); + return NULL; + } + + ifs = asdl_seq_new(n_ifs); + if (!ifs) { + asdl_seq_free(genexps); + free_expr(elt); + return NULL; + } + + for (j = 0; j < n_ifs; j++) { + REQ(ch, gen_iter); + ch = CHILD(ch, 0); + REQ(ch, gen_if); + + asdl_seq_APPEND(ifs, ast_for_expr(c, CHILD(ch, 1))); + if (NCH(ch) == 3) + ch = CHILD(ch, 2); + } + /* on exit, must guarantee that ch is a gen_for */ + if (TYPE(ch) == gen_iter) + ch = CHILD(ch, 0); + ge->ifs = ifs; + } + asdl_seq_APPEND(genexps, ge); + } + + return GeneratorExp(elt, genexps, LINENO(n)); +} + +static expr_ty +ast_for_atom(struct compiling *c, const node *n) +{ + /* atom: '(' [yield_expr|testlist_gexp] ')' | '[' [listmaker] ']' + | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+ + */ + node *ch = CHILD(n, 0); + + switch (TYPE(ch)) { + case NAME: + /* All names start in Load context, but may later be + changed. */ + return Name(NEW_IDENTIFIER(ch), Load, LINENO(n)); + case STRING: { + PyObject *str = parsestrplus(c, n); - assert(TYPE(n) == (testlist_gexp) || TYPE(n) == (argument)); - assert(NCH(n) > 1); + if (!str) + return NULL; - elt = ast_for_expr(c, CHILD(n, 0)); - if (!elt) - return NULL; + return Str(str, LINENO(n)); + } + case NUMBER: { + PyObject *pynum = parsenumber(STR(ch)); - n_fors = count_gen_fors(n); - if (n_fors == -1) - return NULL; + if (!pynum) + return NULL; - genexps = asdl_seq_new(n_fors); - if (!genexps) { - /* XXX free(elt); */ + return Num(pynum, LINENO(n)); + } + case LPAR: /* some parenthesized expressions */ + ch = CHILD(n, 1); + + if (TYPE(ch) == RPAR) + return Tuple(NULL, Load, LINENO(n)); + + if (TYPE(ch) == yield_expr) + return ast_for_expr(c, ch); + + if ((NCH(ch) > 1) && (TYPE(CHILD(ch, 1)) == gen_for)) + return ast_for_genexp(c, ch); + + return ast_for_testlist(c, ch, 1); + case LSQB: /* list (or list comprehension) */ + ch = CHILD(n, 1); + + if (TYPE(ch) == RSQB) + return List(NULL, Load, LINENO(n)); + + REQ(ch, listmaker); + if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) { + asdl_seq *elts = seq_for_testlist(c, ch); + + if (!elts) return NULL; + + return List(elts, Load, LINENO(n)); } + else + return ast_for_listcomp(c, ch); + case LBRACE: { + /* dictmaker: test ':' test (',' test ':' test)* [','] */ + int i, size; + asdl_seq *keys, *values; ch = CHILD(n, 1); - for (i = 0; i < n_fors; i++) { - comprehension_ty ge; - asdl_seq *t; - expr_ty expression; - - REQ(ch, gen_for); - - t = ast_for_exprlist(c, CHILD(ch, 1), Store); - if (!t) { - asdl_seq_free(genexps); - /* XXX free(elt); */ - return NULL; - } - expression = ast_for_testlist(c, CHILD(ch, 3)); - if (!expression) { - asdl_seq_free(genexps); - /* XXX free(elt); */ - return NULL; - } - - if (asdl_seq_LEN(t) == 1) - ge = comprehension(asdl_seq_GET(t, 0), expression, - NULL); - else - ge = comprehension(Tuple(t, Store, LINENO(ch)), - expression, NULL); - - if (!ge) { - asdl_seq_free(genexps); - /* XXX free(elt); */ - return NULL; - } - - if (NCH(ch) == 5) { - int j, n_ifs; - asdl_seq *ifs; - - ch = CHILD(ch, 4); - n_ifs = count_gen_ifs(ch); - if (n_ifs == -1) { - asdl_seq_free(genexps); - /* XXX free(elt); */ - return NULL; - } - - ifs = asdl_seq_new(n_ifs); - if (!ifs) { - asdl_seq_free(genexps); - /* XXX free(elt); */ - return NULL; - } - - for (j = 0; j < n_ifs; j++) { - REQ(ch, gen_iter); - ch = CHILD(ch, 0); - REQ(ch, gen_if); - - asdl_seq_APPEND(ifs, - ast_for_expr(c, CHILD(ch, 1))); - if (NCH(ch) == 3) - ch = CHILD(ch, 2); - } - /* on exit, must guarantee that ch is a gen_for */ - if (TYPE(ch) == gen_iter) - ch = CHILD(ch, 0); - ge->ifs = ifs; - } - asdl_seq_APPEND(genexps, ge); + size = (NCH(ch) + 1) / 4; /* +1 in case no trailing comma */ + keys = asdl_seq_new(size); + if (!keys) + return NULL; + + values = asdl_seq_new(size); + if (!values) { + asdl_seq_free(keys); + return NULL; } - return GeneratorExp(elt, genexps, LINENO(n)); -} - -static expr_ty -ast_for_atom(struct compiling *c, const node *n) -{ - /* XXX yield_expr */ - /* atom: '(' [yield_expr|testlist_gexp] ')' | '[' [listmaker] ']' - | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+ - */ - node *ch = CHILD(n, 0); - - switch (TYPE(ch)) { - case NAME: - /* All names start in Load context, but may later be - changed. */ - return Name(NEW_IDENTIFIER(ch), Load, LINENO(n)); - case STRING: { - PyObject *str = parsestrplus(c, n); - - if (!str) - return NULL; - - return Str(str, LINENO(n)); - } - case NUMBER: { - PyObject *pynum = parsenumber(STR(ch)); - - if (!pynum) - return NULL; - - return Num(pynum, LINENO(n)); - } - case LPAR: /* some parenthesized expressions */ - ch = CHILD(n, 1); - - if (TYPE(ch) == RPAR) - return Tuple(NULL, Load, LINENO(n)); - - if (TYPE(ch) == yield_expr) - return ast_for_expr(c, ch); - - if ((NCH(ch) > 1) && (TYPE(CHILD(ch, 1)) == gen_for)) - return ast_for_genexp(c, ch); - - return ast_for_testlist(c, ch); - case LSQB: /* list (or list comprehension) */ - ch = CHILD(n, 1); - - if (TYPE(ch) == RSQB) - return List(NULL, Load, LINENO(n)); - - REQ(ch, listmaker); - if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) { - asdl_seq *elts = seq_for_testlist(c, ch); - - if (!elts) - return NULL; - - return List(elts, Load, LINENO(n)); - } - else - return ast_for_listcomp(c, ch); - case LBRACE: { - /* dictmaker: test ':' test (',' test ':' test)* [','] */ - int i, size; - asdl_seq *keys, *values; - - ch = CHILD(n, 1); - size = (NCH(ch) + 1) / 4; /* +1 in case no trailing comma */ - keys = asdl_seq_new(size); - if (!keys) - return NULL; - - values = asdl_seq_new(size); - if (!values) { - asdl_seq_free(keys); - return NULL; - } - - for (i = 0; i < NCH(ch); i += 4) { - expr_ty expression; - - expression = ast_for_expr(c, CHILD(ch, i)); - if (!expression) - return NULL; - - asdl_seq_SET(keys, i / 4, expression); - - expression = ast_for_expr(c, CHILD(ch, i + 2)); - if (!expression) - return NULL; - - asdl_seq_SET(values, i / 4, expression); - } - return Dict(keys, values, LINENO(n)); - } - case BACKQUOTE: { /* repr */ - expr_ty expression = ast_for_testlist(c, CHILD(n, 1)); - - if (!expression) - return NULL; - - return Repr(expression, LINENO(n)); - } - default: - PyErr_Format(PyExc_Exception, "unhandled atom %d", - TYPE(ch)); - return NULL; + for (i = 0; i < NCH(ch); i += 4) { + expr_ty expression; + + expression = ast_for_expr(c, CHILD(ch, i)); + if (!expression) + return NULL; + + asdl_seq_SET(keys, i / 4, expression); + + expression = ast_for_expr(c, CHILD(ch, i + 2)); + if (!expression) + return NULL; + + asdl_seq_SET(values, i / 4, expression); } + return Dict(keys, values, LINENO(n)); + } + case BACKQUOTE: { /* repr */ + expr_ty expression = ast_for_testlist(c, CHILD(n, 1), 0); + + if (!expression) + return NULL; + + return Repr(expression, LINENO(n)); + } + default: + PyErr_Format(PyExc_Exception, "unhandled atom %d", + TYPE(ch)); + return NULL; + } } static slice_ty @@ -1544,7 +1552,7 @@ case yield_expr: { expr_ty exp = NULL; if (NCH(n) == 2) { - exp = ast_for_testlist(c, CHILD(n, 1)); + exp = ast_for_testlist(c, CHILD(n, 1), 0); if (!exp) return NULL; } @@ -1612,7 +1620,7 @@ new = Subscript(e, slc, Load, LINENO(ch)); if (!new) { free_expr(e); - /* XXX free(slc); */ + free_slice(slc); return NULL; } } @@ -1788,14 +1796,28 @@ return NULL; } +/* Unlike other ast_for_XXX() functions, this takes a flag that + indicates whether generator expressions are allowed. If gexp is + non-zero, check for testlist_gexp instead of plain testlist. +*/ + static expr_ty -ast_for_testlist(struct compiling *c, const node *n) +ast_for_testlist(struct compiling *c, const node* n, int gexp) { - /* n could be a testlist, a listmaker with no list_for, or - a testlist1 from inside backquotes. */ + /* testlist_gexp: test ( gen_for | (',' test)* [','] ) + testlist: test (',' test)* [','] + */ + assert(NCH(n) > 0); if (NCH(n) == 1) return ast_for_expr(c, CHILD(n, 0)); + if (TYPE(CHILD(n, 1)) == gen_for) { + if (!gexp) { + ast_error(n, "illegal generator expression"); + return NULL; + } + return ast_for_genexp(c, n); + } else { asdl_seq *tmp = seq_for_testlist(c, n); if (!tmp) @@ -1803,14 +1825,15 @@ return Tuple(tmp, Load, LINENO(n)); } + return NULL; /* unreachable */ } static stmt_ty ast_for_expr_stmt(struct compiling *c, const node *n) { REQ(n, expr_stmt); - /* XXX yield_expr */ - /* expr_stmt: testlist (augassign testlist | ('=' testlist)*) + /* expr_stmt: testlist (augassign (yield_expr|testlist) + | ('=' (yield_expr|testlist))*) testlist: test (',' test)* [','] augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//=' @@ -1818,7 +1841,7 @@ */ if (NCH(n) == 1) { - expr_ty e = ast_for_testlist(c, CHILD(n, 0)); + expr_ty e = ast_for_testlist(c, CHILD(n, 0), 0); if (!e) return NULL; @@ -1830,7 +1853,7 @@ node *ch = CHILD(n, 0); if (TYPE(ch) == testlist) - expr1 = ast_for_testlist(c, ch); + expr1 = ast_for_testlist(c, ch, 0); else expr1 = Yield(ast_for_expr(c, CHILD(ch, 0)), LINENO(ch)); @@ -1851,7 +1874,7 @@ ch = CHILD(n, 2); if (TYPE(ch) == testlist) - expr2 = ast_for_testlist(c, ch); + expr2 = ast_for_testlist(c, ch, 0); else expr2 = Yield(ast_for_expr(c, ch), LINENO(ch)); if (!expr2) @@ -1880,7 +1903,7 @@ ast_error(ch, "assignment to yield expression not possible"); goto error; } - expr_ty e = ast_for_testlist(c, ch); + expr_ty e = ast_for_testlist(c, ch, 0); /* set context to assign */ if (!e) @@ -1895,7 +1918,7 @@ } value = CHILD(n, NCH(n) - 1); if (TYPE(value) == testlist) - expression = ast_for_testlist(c, value); + expression = ast_for_testlist(c, value, 0); else expression = ast_for_expr(c, value); if (!expression) @@ -2017,7 +2040,7 @@ if (NCH(ch) == 1) return Return(NULL, LINENO(n)); else { - expr_ty expression = ast_for_testlist(c, CHILD(ch, 1)); + expr_ty expression = ast_for_testlist(c, CHILD(ch, 1), 0); if (!expression) return NULL; return Return(expression, LINENO(n)); @@ -2202,7 +2225,7 @@ aliases = asdl_seq_new((n_children + 1) / 2); if (!aliases) { - free(mod); /* XXX proper way to free alias_ty structs? */ + free_alias(mod); return NULL; } @@ -2211,7 +2234,7 @@ alias_ty import_alias = alias_for_import_name(n); if (!import_alias) { asdl_seq_free(aliases); - free(mod); + free_alias(mod); return NULL; } asdl_seq_APPEND(aliases, import_alias); @@ -2221,13 +2244,14 @@ alias_ty import_alias = alias_for_import_name(CHILD(n, i)); if (!import_alias) { asdl_seq_free(aliases); - free(mod); + free_alias(mod); return NULL; } asdl_seq_APPEND(aliases, import_alias); } + Py_INCREF(mod->name); import = ImportFrom(mod->name, aliases, lineno); - free(mod); + free_alias(mod); return import; } PyErr_Format(PyExc_Exception, @@ -2573,7 +2597,7 @@ else target = Tuple(_target, Store, LINENO(n)); - expression = ast_for_testlist(c, CHILD(n, 3)); + expression = ast_for_testlist(c, CHILD(n, 3), 0); if (!expression) return NULL; suite_seq = ast_for_suite(c, CHILD(n, 5)); @@ -2724,7 +2748,7 @@ } /* else handle the base class list */ - _bases = ast_for_testlist(c, CHILD(n, 3)); + _bases = ast_for_testlist(c, CHILD(n, 3), 0); if (!_bases) return NULL; /* XXX: I don't think we can set to diff types here, how to free??? @@ -2739,6 +2763,7 @@ else { bases = asdl_seq_new(1); if (!bases) { + free_expr(_bases); /* XXX: free _bases */ return NULL; } From pje at users.sourceforge.net Tue Oct 18 06:08:49 2005 From: pje at users.sourceforge.net (pje@users.sourceforge.net) Date: Tue, 18 Oct 2005 06:08:49 +0200 (CEST) Subject: [Python-checkins] python/nondist/sandbox/setuptools/setuptools/command easy_install.py, 1.33, 1.34 Message-ID: <20051018040849.9E0D71E4002@bag.python.org> Update of /cvsroot/python/python/nondist/sandbox/setuptools/setuptools/command In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21360/setuptools/command Modified Files: easy_install.py Log Message: Hurray! Our first dependency processing bug! This is cool because it means that people are finally doing enough things with setuptools to have real-life version conflict scenarios. Luckily, the fix is trivial: use breadth-first instead of depth-first dependency processing, which I thought we were already doing anyway, but weren't. And we were giving precedence to already-installed packages, which means upgrades didn't work so well. Index: easy_install.py =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/setuptools/command/easy_install.py,v retrieving revision 1.33 retrieving revision 1.34 diff -u -d -r1.33 -r1.34 --- easy_install.py 16 Oct 2005 20:45:30 -0000 1.33 +++ easy_install.py 18 Oct 2005 04:08:46 -0000 1.34 @@ -394,7 +394,7 @@ return try: - WorkingSet(self.shadow_path).resolve( + WorkingSet([]).resolve( [requirement], self.local_index, self.easy_install ) except DistributionNotFound, e: From pje at users.sourceforge.net Tue Oct 18 06:08:49 2005 From: pje at users.sourceforge.net (pje@users.sourceforge.net) Date: Tue, 18 Oct 2005 06:08:49 +0200 (CEST) Subject: [Python-checkins] python/nondist/sandbox/setuptools EasyInstall.txt, 1.65, 1.66 pkg_resources.py, 1.74, 1.75 pkg_resources.txt, 1.17, 1.18 Message-ID: <20051018040849.CB4B21E4002@bag.python.org> Update of /cvsroot/python/python/nondist/sandbox/setuptools In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21360 Modified Files: EasyInstall.txt pkg_resources.py pkg_resources.txt Log Message: Hurray! Our first dependency processing bug! This is cool because it means that people are finally doing enough things with setuptools to have real-life version conflict scenarios. Luckily, the fix is trivial: use breadth-first instead of depth-first dependency processing, which I thought we were already doing anyway, but weren't. And we were giving precedence to already-installed packages, which means upgrades didn't work so well. Index: EasyInstall.txt =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/EasyInstall.txt,v retrieving revision 1.65 retrieving revision 1.66 diff -u -d -r1.65 -r1.66 --- EasyInstall.txt 17 Oct 2005 02:26:39 -0000 1.65 +++ EasyInstall.txt 18 Oct 2005 04:08:44 -0000 1.66 @@ -855,6 +855,13 @@ * Improved Windows ``.exe`` script wrappers so that the script can have the same name as a module without confusing Python. + * Changed dependency processing so that it's breadth-first, allowing a + depender's preferences to override those of a dependee, to prevent conflicts + when a lower version is acceptable to the dependee, but not the depender. + Also, ensure that currently installed/selected packages aren't given + precedence over ones desired by a package being installed, which could + cause conflict errors. + 0.6a3 * Improved error message when trying to use old ways of running ``easy_install``. Removed the ability to run via ``python -m`` or by Index: pkg_resources.py =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/pkg_resources.py,v retrieving revision 1.74 retrieving revision 1.75 diff -u -d -r1.74 -r1.75 --- pkg_resources.py 17 Oct 2005 02:26:39 -0000 1.74 +++ pkg_resources.py 18 Oct 2005 04:08:45 -0000 1.75 @@ -467,7 +467,7 @@ to_activate = [] while requirements: - req = requirements.pop() + req = requirements.pop(0) # process dependencies breadth-first if req in processed: # Ignore cyclic or redundant dependencies continue Index: pkg_resources.txt =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/pkg_resources.txt,v retrieving revision 1.17 retrieving revision 1.18 diff -u -d -r1.17 -r1.18 --- pkg_resources.txt 17 Oct 2005 02:26:39 -0000 1.17 +++ pkg_resources.txt 18 Oct 2005 04:08:45 -0000 1.18 @@ -1499,6 +1499,10 @@ non-namespace modules have already been imported and issues a warning if a conflicting module has already been imported. + * Changed dependency processing so that it's breadth-first, allowing a + depender's preferences to override those of a dependee, to prevent conflicts + when a lower version is acceptable to the dependee, but not the depender. + 0.6a4 * Fix a bug in ``WorkingSet.resolve()`` that was introduced in 0.6a3. From nnorwitz at users.sourceforge.net Tue Oct 18 07:07:52 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Tue, 18 Oct 2005 07:07:52 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libos.tex,1.169,1.170 Message-ID: <20051018050752.9BF0A1E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31553/Doc/lib Modified Files: libos.tex Log Message: SF bug #1328915, try to word kill a bit more generically. Backport candidate. Index: libos.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libos.tex,v retrieving revision 1.169 retrieving revision 1.170 diff -u -d -r1.169 -r1.170 --- libos.tex 3 Oct 2005 05:47:38 -0000 1.169 +++ libos.tex 18 Oct 2005 05:07:49 -0000 1.170 @@ -1490,7 +1490,7 @@ \begin{funcdesc}{kill}{pid, sig} \index{process!killing} \index{process!signalling} -Kill the process \var{pid} with signal \var{sig}. Constants for the +Send signal \var{sig} to the process \var{pid}. Constants for the specific signals available on the host platform are defined in the \refmodule{signal} module. Availability: Macintosh, \UNIX. @@ -1499,7 +1499,7 @@ \begin{funcdesc}{killpg}{pgid, sig} \index{process!killing} \index{process!signalling} -Kill the process group \var{pgid} with the signal \var{sig}. +Send the signal \var{sig} to the process group \var{pgid}. Availability: Macintosh, \UNIX. \versionadded{2.3} \end{funcdesc} From jhylton at users.sourceforge.net Tue Oct 18 15:31:46 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Tue, 18 Oct 2005 15:31:46 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python newcompile.c, 1.1.2.123, 1.1.2.124 Message-ID: <20051018133146.6F31D1E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12507/Python Modified Files: Tag: ast-branch newcompile.c Log Message: Remove comments and debugging code (mostly fprintfs) The initial comment block contained a list of open issues, which was mostly old. I think I've only removed that issues that are resolved. Index: newcompile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/Attic/newcompile.c,v retrieving revision 1.1.2.123 retrieving revision 1.1.2.124 diff -u -d -r1.1.2.123 -r1.1.2.124 --- newcompile.c 17 Oct 2005 15:01:34 -0000 1.1.2.123 +++ newcompile.c 18 Oct 2005 13:31:36 -0000 1.1.2.124 @@ -26,42 +26,17 @@ int Py_OptimizeFlag = 0; -#if 1 -#define fprintf if (0) fprintf -#endif - /* - KNOWN BUGS: - - Seg Faults: - #: exec generally still has problems - #: do something about memory management! - - Inappropriate Exceptions: - #: Get this err msg: XXX rd_object called with exception set - From Python/marshal.c::PyMarshal_ReadLastObjectFromFile() - This looks like it may be related to encoding not being implemented. - #: These don't work right (from test_grammar): - def f4(two, (compound, (argument, list))): pass - def v3(a, (b, c), *rest): return a, b, c, rest + ISSUES: - Invalid behaviour: - #: Source encoding (via encoding_decl) is ignored. decode_unicode() - seems to be running into a memory management bug. - #: Ellipsis isn't handled properly - #: co_names doesn't contain locals, only globals, co_varnames may work - #: ref leaks in interpreter when press return on empty line - #: line numbers are off a bit (may just need to add calls to set lineno) - In some cases, the line numbers for generated code aren't strictly - increasing. This breaks the lnotab. + character encodings aren't handled - ISSUES: + ref leaks in interpreter when press return on empty line opcode_stack_effect() function should be reviewed since stack depth bugs could be really hard to find later. Dead code is being generated (i.e. after unconditional jumps). - */ #define DEFAULT_BLOCK_SIZE 16 @@ -211,155 +186,6 @@ static int expr_constant(expr_ty e); static PyCodeObject *assemble(struct compiler *, int addNone); - - -static char *opnames[] = { - "STOP_CODE", - "POP_TOP", - "ROT_TWO", - "ROT_THREE", - "DUP_TOP", - "ROT_FOUR", - "<6>", - "<7>", - "<8>", - "<9>", - "UNARY_POSITIVE", - "UNARY_NEGATIVE", - "UNARY_NOT", - "UNARY_CONVERT", - "<14>", - "UNARY_INVERT", - "<16>", - "<17>", - "<18>", - "BINARY_POWER", - "BINARY_MULTIPLY", - "BINARY_DIVIDE", - "BINARY_MODULO", - "BINARY_ADD", - "BINARY_SUBTRACT", - "BINARY_SUBSCR", - "BINARY_FLOOR_DIVIDE", - "BINARY_TRUE_DIVIDE", - "INPLACE_FLOOR_DIVIDE", - "INPLACE_TRUE_DIVIDE", - "SLICE+0", - "SLICE+1", - "SLICE+2", - "SLICE+3", - "<34>", - "<35>", - "<36>", - "<37>", - "<38>", - "<39>", - "STORE_SLICE+0", - "STORE_SLICE+1", - "STORE_SLICE+2", - "STORE_SLICE+3", - "<44>", - "<45>", - "<46>", - "<47>", - "<48>", - "<49>", - "DELETE_SLICE+0", - "DELETE_SLICE+1", - "DELETE_SLICE+2", - "DELETE_SLICE+3", - "<54>", - "INPLACE_ADD", - "INPLACE_SUBTRACT", - "INPLACE_MULTIPLY", - "INPLACE_DIVIDE", - "INPLACE_MODULO", - "STORE_SUBSCR", - "DELETE_SUBSCR", - "BINARY_LSHIFT", - "BINARY_RSHIFT", - "BINARY_AND", - "BINARY_XOR", - "BINARY_OR", - "INPLACE_POWER", - "GET_ITER", - "<69>", - "PRINT_EXPR", - "PRINT_ITEM", - "PRINT_NEWLINE", - "PRINT_ITEM_TO", - "PRINT_NEWLINE_TO", - "INPLACE_LSHIFT", - "INPLACE_RSHIFT", - "INPLACE_AND", - "INPLACE_XOR", - "INPLACE_OR", - "BREAK_LOOP", - "<81>", - "LOAD_LOCALS", - "RETURN_VALUE", - "IMPORT_STAR", - "EXEC_STMT", - "YIELD_VALUE", - "POP_BLOCK", - "END_FINALLY", - "BUILD_CLASS", - "STORE_NAME", - "DELETE_NAME", - "UNPACK_SEQUENCE", - "FOR_ITER", - "<94>", - "STORE_ATTR", - "DELETE_ATTR", - "STORE_GLOBAL", - "DELETE_GLOBAL", - "DUP_TOPX", - "LOAD_CONST", - "LOAD_NAME", - "BUILD_TUPLE", - "BUILD_LIST", - "BUILD_MAP", - "LOAD_ATTR", - "COMPARE_OP", - "IMPORT_NAME", - "IMPORT_FROM", - "<109>", - "JUMP_FORWARD", - "JUMP_IF_FALSE", - "JUMP_IF_TRUE", - "JUMP_ABSOLUTE", - "<114>", - "<115>", - "LOAD_GLOBAL", - "<117>", - "<118>", - "CONTINUE_LOOP", - "SETUP_LOOP", - "SETUP_EXCEPT", - "SETUP_FINALLY", - "<123>", - "LOAD_FAST", - "STORE_FAST", - "DELETE_FAST", - "<127>", - "<128>", - "<129>", - "RAISE_VARARGS", - "CALL_FUNCTION", - "MAKE_FUNCTION", - "BUILD_SLICE", - "MAKE_CLOSURE", - "LOAD_CLOSURE", - "LOAD_DEREF", - "STORE_DEREF", - "<138>", - "<139>", - "CALL_FUNCTION_VAR", - "CALL_FUNCTION_KW", - "CALL_FUNCTION_VAR_KW", - "EXTENDED_ARG", -}; - static PyObject *__doc__; PyObject * @@ -441,18 +267,6 @@ c.c_flags = flags; c.c_nestlevel = 0; - /* Trivial test of marshal code for now. */ - { - PyObject *buf = PyString_FromStringAndSize(NULL, 1024); - int offset = 0; - assert(marshal_write_mod(&buf, &offset, mod)); - if (!_PyString_Resize(&buf, offset) < 0) { - fprintf(stderr, "resize failed!\n"); - goto error; - } - } - - fprintf(stderr, "ast %s\n", filename); c.c_st = PySymtable_Build(mod, filename, c.c_future); if (c.c_st == NULL) { if (!PyErr_Occurred()) @@ -460,14 +274,11 @@ goto error; } - fprintf(stderr, "symtable %s\n", filename); - /* XXX initialize to NULL for now, need to handle */ c.c_encoding = NULL; co = compiler_mod(&c, mod); - fprintf(stderr, "code %s\n", filename); error: compiler_free(&c); return co; @@ -1157,6 +968,10 @@ /* End: Peephole optimizations ----------------------------------------- */ +/* + +Leave this debugging code for just a little longer. + static void compiler_display_symbols(PyObject *name, PyObject *symbols) { @@ -1191,6 +1006,52 @@ } fprintf(stderr, "\n"); } +*/ + +static void +compiler_unit_check(struct compiler_unit *u) +{ + basicblock *block; + for (block = u->u_blocks; block != NULL; block = block->b_list) { + assert(block != (void *)0xcbcbcbcb); + assert(block != (void *)0xfbfbfbfb); + assert(block != (void *)0xdbdbdbdb); + if (block->b_instr != NULL) { + assert(block->b_ialloc > 0); + assert(block->b_iused > 0); + assert(block->b_ialloc >= block->b_iused); + } + else { + assert (block->b_iused == 0); + assert (block->b_ialloc == 0); + } + } +} + +static void +compiler_unit_free(struct compiler_unit *u) +{ + basicblock *b, *next; + + compiler_unit_check(u); + b = u->u_blocks; + while (b != NULL) { + if (b->b_instr) + PyObject_Free((void *)b->b_instr); + next = b->b_list; + PyObject_Free((void *)b); + b = next; + } + Py_XDECREF(u->u_ste); + Py_XDECREF(u->u_name); + Py_XDECREF(u->u_consts); + Py_XDECREF(u->u_names); + Py_XDECREF(u->u_varnames); + Py_XDECREF(u->u_freevars); + Py_XDECREF(u->u_cellvars); + Py_XDECREF(u->u_private); + PyObject_Free(u); +} static int compiler_enter_scope(struct compiler *c, identifier name, void *key, @@ -1199,10 +1060,11 @@ struct compiler_unit *u; u = PyObject_Malloc(sizeof(struct compiler_unit)); + memset(u, 0, sizeof(struct compiler_unit)); u->u_argcount = 0; u->u_ste = PySymtable_Lookup(c->c_st, key); if (!u->u_ste) { - PyObject_Free(u); + compiler_unit_free(u); return 0; } Py_INCREF(name); @@ -1220,32 +1082,25 @@ u->u_lineno_set = false; u->u_consts = PyDict_New(); if (!u->u_consts) { - /* XXX: free_u->u_ste); */ - PyObject_Free(u); + compiler_unit_free(u); return 0; } u->u_names = PyDict_New(); if (!u->u_names) { - /* XXX: free_u->u_ste); */ - PyObject_Free(u); + compiler_unit_free(u); return 0; } u->u_private = NULL; - /* A little debugging output */ - compiler_display_symbols(name, u->u_ste->ste_symbols); - /* Push the old compiler_unit on the stack. */ if (c->u) { PyObject *wrapper = PyCObject_FromVoidPtr(c->u, NULL); if (PyList_Append(c->c_stack, wrapper) < 0) { - /* XXX: free_u->u_ste); */ - PyObject_Free(u); + compiler_unit_free(u); return 0; } Py_DECREF(wrapper); - fprintf(stderr, "stack = %s\n", PyObject_REPR(c->c_stack)); u->u_private = c->u->u_private; Py_XINCREF(u->u_private); } @@ -1258,51 +1113,6 @@ return 1; } -static void -compiler_unit_check(struct compiler_unit *u) -{ - basicblock *block; - for (block = u->u_blocks; block != NULL; block = block->b_list) { - assert(block != (void *)0xcbcbcbcb); - assert(block != (void *)0xfbfbfbfb); - assert(block != (void *)0xdbdbdbdb); - if (block->b_instr != NULL) { - assert(block->b_ialloc > 0); - assert(block->b_iused > 0); - assert(block->b_ialloc >= block->b_iused); - } - else { - assert (block->b_iused == 0); - assert (block->b_ialloc == 0); - } - } -} - -static void -compiler_unit_free(struct compiler_unit *u) -{ - basicblock *b, *next; - - compiler_unit_check(u); - b = u->u_blocks; - while (b != NULL) { - if (b->b_instr) - PyObject_Free((void *)b->b_instr); - next = b->b_list; - PyObject_Free((void *)b); - b = next; - } - Py_XDECREF(u->u_ste); - Py_XDECREF(u->u_name); - Py_XDECREF(u->u_consts); - Py_XDECREF(u->u_names); - Py_XDECREF(u->u_varnames); - Py_XDECREF(u->u_freevars); - Py_XDECREF(u->u_cellvars); - Py_XDECREF(u->u_private); - PyObject_Free(u); -} - static int compiler_exit_scope(struct compiler *c) { @@ -2663,7 +2473,6 @@ { int i, n; - fprintf(stderr, "compile stmt %d lineno %d\n", s->kind, s->lineno); c->u->u_lineno = s->lineno; c->u->u_lineno_set = false; switch (s->kind) { @@ -2928,11 +2737,6 @@ break; } - fprintf(stderr, - "block=%s name=%s opt=%d scope=%d optype=%d\n", - PyString_AS_STRING(c->u->u_ste->ste_name), - PyString_AS_STRING(name), - c->u->u_ste->ste_unoptimized, scope, optype); /* XXX Leave assert here, but handle __doc__ and the like better */ assert(scope || PyString_AS_STRING(name)[0] == '_'); @@ -3378,7 +3182,6 @@ { int i, n; - fprintf(stderr, "compile expr %d lineno %d\n", e->kind, e->lineno); if (e->lineno > c->u->u_lineno) { c->u->u_lineno = e->lineno; c->u->u_lineno_set = false; @@ -3519,7 +3322,7 @@ assert(s->kind == AugAssign_kind); switch (e->kind) { - case Attribute_kind: + case Attribute_kind: auge = Attribute(e->v.Attribute.value, e->v.Attribute.attr, AugLoad, e->lineno); if (auge == NULL) @@ -3542,15 +3345,16 @@ auge->v.Subscript.ctx = AugStore; VISIT(c, expr, auge); free(auge); - break; + break; case Name_kind: VISIT(c, expr, s->v.AugAssign.target); VISIT(c, expr, s->v.AugAssign.value); ADDOP(c, inplace_binop(c, s->v.AugAssign.op)); return compiler_nameop(c, e->v.Name.id, Store); default: - fprintf(stderr, "invalid node type for augmented assignment\n"); - return 0; + fprintf(stderr, + "invalid node type for augmented assignment\n"); + return 0; } return 1; } @@ -3609,29 +3413,31 @@ static int compiler_handle_subscr(struct compiler *c, const char *kind, - expr_context_ty ctx) { - int op = 0; + expr_context_ty ctx) +{ + int op = 0; - /* XXX this code is duplicated */ - switch (ctx) { - case AugLoad: /* fall through to Load */ - case Load: op = BINARY_SUBSCR; break; - case AugStore:/* fall through to Store */ - case Store: op = STORE_SUBSCR; break; - case Del: op = DELETE_SUBSCR; break; - case Param: - fprintf(stderr, "invalid %s kind %d in compiler_visit_slice\n", - kind, ctx); - return 0; - } - if (ctx == AugLoad) { - ADDOP_I(c, DUP_TOPX, 2); - } - else if (ctx == AugStore) { - ADDOP(c, ROT_THREE); - } - ADDOP(c, op); - return 1; + /* XXX this code is duplicated */ + switch (ctx) { + case AugLoad: /* fall through to Load */ + case Load: op = BINARY_SUBSCR; break; + case AugStore:/* fall through to Store */ + case Store: op = STORE_SUBSCR; break; + case Del: op = DELETE_SUBSCR; break; + case Param: + fprintf(stderr, + "invalid %s kind %d in subscript\n", + kind, ctx); + return 0; + } + if (ctx == AugLoad) { + ADDOP_I(c, DUP_TOPX, 2); + } + else if (ctx == AugStore) { + ADDOP(c, ROT_THREE); + } + ADDOP(c, op); + return 1; } static int @@ -3807,15 +3613,11 @@ return maxdepth; b->b_seen = 1; b->b_startdepth = depth; - fprintf(stderr, "block %p\n", b); for (i = 0; i < b->b_iused; i++) { instr = &b->b_instr[i]; depth += opcode_stack_effect(instr->i_opcode, instr->i_oparg); if (depth > maxdepth) maxdepth = depth; - fprintf(stderr, " %14s %3d %3d (%d)\n", - opnames[instr->i_opcode], depth, maxdepth, - instr->i_lineno); assert(depth >= 0); /* invalid code or bug in stackdepth() */ if (instr->i_jrel || instr->i_jabs) { maxdepth = stackdepth_walk(c, instr->i_target, @@ -3901,25 +3703,6 @@ return size; } -/* Produce output that looks rather like dis module output. */ - -static void -assemble_display(struct assembler *a, struct instr *i) -{ - /* Dispatch the simple case first. */ - if (!i->i_hasarg) { - fprintf(stderr, "%5d %-20.20s %d\n", - a->a_offset, opnames[i->i_opcode], i->i_lineno); - return; - } - - fprintf(stderr, "%5d %-20.20s %3d %d", - a->a_offset, opnames[i->i_opcode], i->i_oparg, i->i_lineno); - if (i->i_jrel) - fprintf(stderr, " (to %d)", a->a_offset + i->i_oparg + 3); - fprintf(stderr, "\n"); -} - /* All about a_lnotab. c_lnotab is an array of unsigned bytes disguised as a Python string. @@ -4059,7 +3842,6 @@ int len = PyString_GET_SIZE(a->a_bytecode); char *code; - assemble_display(a, i); if (!i->i_hasarg) size = 1; else { @@ -4290,15 +4072,10 @@ /* Emit code in reverse postorder from dfs. */ for (i = a.a_nblocks - 1; i >= 0; i--) { basicblock *b = a.a_postorder[i]; - fprintf(stderr, - "\nblock %p: order=%d used=%d alloc=%d next=%p\n", - a.a_postorder[i], i, b->b_iused, b->b_ialloc, - b->b_next); for (j = 0; j < b->b_iused; j++) if (!assemble_emit(&a, &b->b_instr[j])) goto error; } - fprintf(stderr, "\n"); if (_PyString_Resize(&a.a_lnotab, a.a_lnotab_off) < 0) goto error; @@ -4310,5 +4087,3 @@ assemble_free(&a); return co; } - - From jhylton at users.sourceforge.net Tue Oct 18 15:36:10 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Tue, 18 Oct 2005 15:36:10 +0200 (CEST) Subject: [Python-checkins] python/dist/src/PCbuild pythoncore.vcproj, 1.26.2.3, 1.26.2.4 Message-ID: <20051018133610.1F7BF1E4003@bag.python.org> Update of /cvsroot/python/python/dist/src/PCbuild In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14036/PCbuild Modified Files: Tag: ast-branch pythoncore.vcproj Log Message: Replace compile.c with newcompile.c. Index: pythoncore.vcproj =================================================================== RCS file: /cvsroot/python/python/dist/src/PCbuild/pythoncore.vcproj,v retrieving revision 1.26.2.3 retrieving revision 1.26.2.4 diff -u -d -r1.26.2.3 -r1.26.2.4 --- pythoncore.vcproj 16 Oct 2005 05:24:05 -0000 1.26.2.3 +++ pythoncore.vcproj 18 Oct 2005 13:36:06 -0000 1.26.2.4 @@ -2291,7 +2291,7 @@ + RelativePath="..\Python\compile.c"> From jhylton at users.sourceforge.net Tue Oct 18 15:36:10 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Tue, 18 Oct 2005 15:36:10 +0200 (CEST) Subject: [Python-checkins] python/dist/src Makefile.pre.in, 1.86.2.7, 1.86.2.8 Message-ID: <20051018133610.1F12F1E4002@bag.python.org> Update of /cvsroot/python/python/dist/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14036 Modified Files: Tag: ast-branch Makefile.pre.in Log Message: Replace compile.c with newcompile.c. Index: Makefile.pre.in =================================================================== RCS file: /cvsroot/python/python/dist/src/Makefile.pre.in,v retrieving revision 1.86.2.7 retrieving revision 1.86.2.8 diff -u -d -r1.86.2.7 -r1.86.2.8 --- Makefile.pre.in 16 Oct 2005 05:23:54 -0000 1.86.2.7 +++ Makefile.pre.in 18 Oct 2005 13:36:06 -0000 1.86.2.8 @@ -235,7 +235,7 @@ Python/bltinmodule.o \ Python/exceptions.o \ Python/ceval.o \ - Python/newcompile.o \ + Python/compile.o \ Python/codecs.o \ Python/errors.o \ Python/frozen.o \ From jhylton at users.sourceforge.net Tue Oct 18 15:36:10 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Tue, 18 Oct 2005 15:36:10 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python compile.c, 2.247.2.4, 2.247.2.5 newcompile.c, 1.1.2.124, NONE Message-ID: <20051018133610.C7A3C1E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14036/Python Modified Files: Tag: ast-branch compile.c Removed Files: Tag: ast-branch newcompile.c Log Message: Replace compile.c with newcompile.c. Index: compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.247.2.4 retrieving revision 2.247.2.5 diff -u -d -r2.247.2.4 -r2.247.2.5 --- compile.c 16 Oct 2005 05:24:05 -0000 2.247.2.4 +++ compile.c 18 Oct 2005 13:36:07 -0000 2.247.2.5 @@ -1,328 +1,379 @@ -/* Compile an expression node to intermediate code */ - -/* XXX TO DO: - XXX add __doc__ attribute == co_doc to code object attributes? - XXX (it's currently the first item of the co_const tuple) - XXX Generate simple jump for break/return outside 'try...finally' - XXX Allow 'continue' inside finally clause of try-finally - XXX New opcode for loading the initial index for a for loop - XXX other JAR tricks? -*/ +/* [...8429 lines suppressed...] + goto error; + + /* Emit code in reverse postorder from dfs. */ + for (i = a.a_nblocks - 1; i >= 0; i--) { + basicblock *b = a.a_postorder[i]; + for (j = 0; j < b->b_iused; j++) + if (!assemble_emit(&a, &b->b_instr[j])) + goto error; } + + if (_PyString_Resize(&a.a_lnotab, a.a_lnotab_off) < 0) + goto error; + if (_PyString_Resize(&a.a_bytecode, a.a_offset) < 0) + goto error; + + co = makecode(c, &a); + error: + assemble_free(&a); + return co; } --- newcompile.c DELETED --- From martin.v.loewis at python.org Wed Oct 19 00:05:05 2005 From: martin.v.loewis at python.org (martin.v.loewis@python.org) Date: Wed, 19 Oct 2005 00:05:05 +0200 (CEST) Subject: [Python-checkins] commit of r41149 - python/trunk/Modules Message-ID: <20051018220505.C39FA1E4008@bag.python.org> Author: martin.v.loewis Date: Wed Oct 19 00:05:04 2005 New Revision: 41149 Modified: python/trunk/Modules/zlibmodule.c Log: Dummy checkin to test mailer. Modified: python/trunk/Modules/zlibmodule.c ============================================================================== --- python/trunk/Modules/zlibmodule.c (original) +++ python/trunk/Modules/zlibmodule.c Wed Oct 19 00:05:04 2005 @@ -1,7 +1,7 @@ /* zlibmodule.c -- gzip-compatible data compression */ /* See http://www.gzip.org/zlib/ */ -/* Windows users: read Python's PCbuild\readme.txt */ +/* Windows users: read Python's PCbuild/readme.txt */ #include "Python.h" From pje at users.sourceforge.net Wed Oct 19 05:00:37 2005 From: pje at users.sourceforge.net (pje@users.sourceforge.net) Date: Wed, 19 Oct 2005 05:00:37 +0200 (CEST) Subject: [Python-checkins] python/nondist/sandbox/setuptools EasyInstall.txt, 1.66, 1.67 Message-ID: <20051019030037.8999E1E401F@bag.python.org> Update of /cvsroot/python/python/nondist/sandbox/setuptools In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17238 Modified Files: EasyInstall.txt Log Message: Added "--allow-hosts" option to restrict downloading and spidering to a specified list of server glob patterns. Index: EasyInstall.txt =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/EasyInstall.txt,v retrieving revision 1.66 retrieving revision 1.67 diff -u -d -r1.66 -r1.67 --- EasyInstall.txt 18 Oct 2005 04:08:44 -0000 1.66 +++ EasyInstall.txt 19 Oct 2005 03:00:30 -0000 1.67 @@ -692,6 +692,26 @@ tools that wrap eggs in a platform-specific packaging system. (We don't recommend that you use it for anything else.) +``--allow-hosts=PATTERNS, -H PATTERNS`` (New in 0.6a6) + Restrict downloading and spidering to hosts matching the specified glob + patterns. E.g. ``-H *.python.org`` restricts web access so that only + packages listed and downloadable from machines in the ``python.org`` + domain. The glob patterns must match the *entire* user/host/port section of + the target URL(s). For example, ``*.python.org`` will NOT accept a URL + like ``http://python.org/foo`` or ``http://www.python.org:8080/``. + Multiple patterns can be specified by separting them with commas. The + default pattern is ``*``, which matches anything. + + In general, this option is mainly useful for blocking EasyInstall's web + access altogether (e.g. ``-Hlocalhost``), or to restrict it to an intranet + or other trusted site. EasyInstall will do the best it can to satisfy + dependencies given your host restrictions, but of course can fail if it + can't find suitable packages. EasyInstall displays all blocked URLs, so + that you can adjust your ``--allow-hosts`` setting if it is more strict + than you intended. Some sites may wish to define a restrictive default + setting for this option in their `configuration files`_, and then manually + override the setting on the command line as needed. + Non-Root Installation --------------------- @@ -850,7 +870,7 @@ that makes the PYTHONPATH-based approach work with .pth files, so that you can get the full EasyInstall feature set on such installations. - * Added ``--no-deps`` option. + * Added ``--no-deps`` and ``--allow-hosts`` options. * Improved Windows ``.exe`` script wrappers so that the script can have the same name as a module without confusing Python. From pje at users.sourceforge.net Wed Oct 19 05:00:38 2005 From: pje at users.sourceforge.net (pje@users.sourceforge.net) Date: Wed, 19 Oct 2005 05:00:38 +0200 (CEST) Subject: [Python-checkins] python/nondist/sandbox/setuptools/setuptools/command easy_install.py, 1.34, 1.35 Message-ID: <20051019030038.E1B811E4018@bag.python.org> Update of /cvsroot/python/python/nondist/sandbox/setuptools/setuptools/command In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17238/setuptools/command Modified Files: easy_install.py Log Message: Added "--allow-hosts" option to restrict downloading and spidering to a specified list of server glob patterns. Index: easy_install.py =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/setuptools/command/easy_install.py,v retrieving revision 1.34 retrieving revision 1.35 diff -u -d -r1.34 -r1.35 --- easy_install.py 18 Oct 2005 04:08:46 -0000 1.34 +++ easy_install.py 19 Oct 2005 03:00:35 -0000 1.35 @@ -43,7 +43,6 @@ """Manage a download/build/install process""" description = "Find/get/install Python packages" - command_consumes_arguments = True user_options = [ @@ -71,6 +70,7 @@ ('site-dirs=','S',"list of directories where .pth files work"), ('editable', 'e', "Install specified packages in editable form"), ('no-deps', 'N', "don't install dependencies"), + ('allow-hosts=', 'H', "pattern(s) that hostnames must match"), ] boolean_options = [ 'zip-ok', 'multi-version', 'exclude-scripts', 'upgrade', 'always-copy', @@ -89,7 +89,7 @@ self.args = None self.optimize = self.record = None self.upgrade = self.always_copy = self.multi_version = None - self.editable = self.no_deps = None + self.editable = self.no_deps = self.allow_hosts = None self.root = None # Options not specifiable via command line @@ -177,9 +177,15 @@ for path_item in self.install_dir, normalize_path(self.script_dir): if path_item not in self.shadow_path: self.shadow_path.insert(0, path_item) + + if self.allow_hosts is not None: + hosts = [s.strip() for s in self.allow_hosts.split(',')] + else: + hosts = ['*'] + if self.package_index is None: self.package_index = self.create_index( - self.index_url, search_path = self.shadow_path + self.index_url, search_path = self.shadow_path, hosts=hosts ) self.local_index = Environment(self.shadow_path) @@ -202,7 +208,6 @@ "Can't use both --delete-conflicting and " "--ignore-conflicts-at-my-risk at the same time" ) - if self.editable and not self.build_directory: raise DistutilsArgError( "Must specify a build directory (-b) when using --editable" @@ -239,11 +244,6 @@ log.set_verbosity(self.distribution.verbose) - - - - - def install_egg_scripts(self, dist): """Write all the scripts for `dist`, unless scripts are excluded""" From pje at users.sourceforge.net Wed Oct 19 05:00:39 2005 From: pje at users.sourceforge.net (pje@users.sourceforge.net) Date: Wed, 19 Oct 2005 05:00:39 +0200 (CEST) Subject: [Python-checkins] python/nondist/sandbox/setuptools/setuptools package_index.py, 1.21, 1.22 Message-ID: <20051019030039.184271E400D@bag.python.org> Update of /cvsroot/python/python/nondist/sandbox/setuptools/setuptools In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17238/setuptools Modified Files: package_index.py Log Message: Added "--allow-hosts" option to restrict downloading and spidering to a specified list of server glob patterns. Index: package_index.py =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/setuptools/package_index.py,v retrieving revision 1.21 retrieving revision 1.22 diff -u -d -r1.21 -r1.22 --- package_index.py 24 Sep 2005 19:44:27 -0000 1.21 +++ package_index.py 19 Oct 2005 03:00:33 -0000 1.22 @@ -5,11 +5,11 @@ from distutils import log from distutils.errors import DistutilsError from md5 import md5 +from fnmatch import translate EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$') HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I) # this is here to fix emacs' cruddy broken syntax highlighting - PYPI_MD5 = re.compile( '([^<]+)\n\s+\\(md5\\)' @@ -124,25 +124,25 @@ class PackageIndex(Environment): """A distribution index that scans web pages for download URLs""" - def __init__(self,index_url="http://www.python.org/pypi",*args,**kw): + def __init__(self,index_url="http://www.python.org/pypi",hosts=('*',),*args,**kw): Environment.__init__(self,*args,**kw) self.index_url = index_url + "/"[:not index_url.endswith('/')] self.scanned_urls = {} self.fetched_urls = {} self.package_pages = {} + self.allows = re.compile('|'.join(map(translate,hosts))).match def process_url(self, url, retrieve=False): """Evaluate a URL as a possible download, and maybe retrieve it""" - if url in self.scanned_urls and not retrieve: return self.scanned_urls[url] = True - if not URL_SCHEME(url): # process filenames or directories if os.path.isfile(url): - dists = list(distros_for_filename(url)) + map(self.add, distros_for_filename(url)) + return # no need to retrieve anything elif os.path.isdir(url): url = os.path.realpath(url) for item in os.listdir(url): @@ -153,13 +153,16 @@ return else: dists = list(distros_for_url(url)) + if dists: + if not self.url_ok(url): + return + self.debug("Found link: %s", url) - if dists: - self.debug("Found link: %s", url) if dists or not retrieve or url in self.fetched_urls: - for dist in dists: - self.add(dist) - # don't need the actual page + map(self.add, dists) + return # don't need the actual page + + if not self.url_ok(url): return self.info("Reading %s", url) @@ -181,17 +184,14 @@ self.process_url(link) - - - - - - - - - - - + def url_ok(self, url, fatal=False): + if self.allows(urlparse.urlparse(url)[1]): + return True + msg = "\nLink to % s ***BLOCKED*** by --allow-hosts\n" + if fatal: + raise DistutilsError(msg % url) + else: + self.warn(msg, url) @@ -368,8 +368,8 @@ dl_blocksize = 8192 - def _download_to(self, url, filename): + self.url_ok(url,True) # raises error if not allowed self.info("Downloading %s", url) # Download the file fp, tfp, info = None, None, None From kitty at buddies.brastart.com Wed Oct 19 06:18:38 2005 From: kitty at buddies.brastart.com (kitty@buddies.brastart.com) Date: Wed, 19 Oct 2005 12:18:38 +0800 Subject: [Python-checkins] Buddies ?J web site , ???h?i????~~~ Message-ID: <20051019121838.BE4CD1594013@xccnotebook> Buddies J web site , nhga ~~~ ڻUuYi ~~~ www.buddies.brastart.com Oo d M 벼 ~~~ pGW link o N www.buddies.t35.com -------------- next part -------------- An HTML attachment was scrubbed... URL: http://mail.python.org/pipermail/python-checkins/attachments/20051019/e1b7ec35/attachment.html From nnorwitz at users.sourceforge.net Wed Oct 19 08:31:40 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Wed, 19 Oct 2005 08:31:40 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python compile.c, 2.247.2.5, 2.247.2.6 Message-ID: <20051019063140.946A11E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16668/Python Modified Files: Tag: ast-branch compile.c Log Message: Fix memory leaks in error conditions Index: compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.247.2.5 retrieving revision 2.247.2.6 diff -u -d -r2.247.2.5 -r2.247.2.6 --- compile.c 18 Oct 2005 13:36:07 -0000 2.247.2.5 +++ compile.c 19 Oct 2005 06:31:37 -0000 2.247.2.6 @@ -1060,6 +1060,9 @@ struct compiler_unit *u; u = PyObject_Malloc(sizeof(struct compiler_unit)); + if (!u) + return 0; + memset(u, 0, sizeof(struct compiler_unit)); u->u_argcount = 0; u->u_ste = PySymtable_Lookup(c->c_st, key); @@ -1070,9 +1073,21 @@ Py_INCREF(name); u->u_name = name; u->u_varnames = list2dict(u->u_ste->ste_varnames); + if (!u->u_varnames) { + compiler_unit_free(u); + return 0; + } u->u_cellvars = dictbytype(u->u_ste->ste_symbols, CELL, 0, 0); + if (!u->u_cellvars) { + compiler_unit_free(u); + return 0; + } u->u_freevars = dictbytype(u->u_ste->ste_symbols, FREE, DEF_FREE_CLASS, PyDict_Size(u->u_cellvars)); + if (!u->u_freevars) { + compiler_unit_free(u); + return 0; + } u->u_blocks = NULL; u->u_tmpname = 0; From lemburg at users.sourceforge.net Thu Oct 20 00:33:35 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Thu, 20 Oct 2005 00:33:35 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Objects unicodeobject.c, 2.232, 2.233 Message-ID: <20051019223335.1E4271E4003@bag.python.org> Update of /cvsroot/python/python/dist/src/Objects In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13762 Modified Files: unicodeobject.c Log Message: Bug fix for [ 1331062 ] utf 7 codec broken. Backport candidate. Index: unicodeobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v retrieving revision 2.232 retrieving revision 2.233 diff -u -d -r2.232 -r2.233 --- unicodeobject.c 6 Oct 2005 20:29:57 -0000 2.232 +++ unicodeobject.c 19 Oct 2005 22:33:31 -0000 2.233 @@ -843,15 +843,23 @@ }; +/* Note: The comparison (c) <= 0 is a trick to work-around gcc + warnings about the comparison always being false; since + utf7_special[0] is 1, we can safely make that one comparison + true */ + #define SPECIAL(c, encodeO, encodeWS) \ - (((c)>127 || utf7_special[(c)] == 1) || \ + ((c) > 127 || (c) <= 0 || utf7_special[(c)] == 1 || \ (encodeWS && (utf7_special[(c)] == 2)) || \ (encodeO && (utf7_special[(c)] == 3))) -#define B64(n) ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f]) -#define B64CHAR(c) (isalnum(c) || (c) == '+' || (c) == '/') -#define UB64(c) ((c) == '+' ? 62 : (c) == '/' ? 63 : (c) >= 'a' ? \ - (c) - 71 : (c) >= 'A' ? (c) - 65 : (c) + 4) +#define B64(n) \ + ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f]) +#define B64CHAR(c) \ + (isalnum(c) || (c) == '+' || (c) == '/') +#define UB64(c) \ + ((c) == '+' ? 62 : (c) == '/' ? 63 : (c) >= 'a' ? \ + (c) - 71 : (c) >= 'A' ? (c) - 65 : (c) + 4 ) #define ENCODE(out, ch, bits) \ while (bits >= 6) { \ @@ -864,8 +872,8 @@ Py_UNICODE outCh = (Py_UNICODE) ((ch >> (bits-16)) & 0xffff); \ bits -= 16; \ if (surrogate) { \ - /* We have already generated an error for the high surrogate - so let's not bother seeing if the low surrogate is correct or not */\ + /* We have already generated an error for the high surrogate \ + so let's not bother seeing if the low surrogate is correct or not */ \ surrogate = 0; \ } else if (0xDC00 <= outCh && outCh <= 0xDFFF) { \ /* This is a surrogate pair. Unfortunately we can't represent \ @@ -876,7 +884,7 @@ } else { \ *out++ = outCh; \ } \ - } \ + } PyObject *PyUnicode_DecodeUTF7(const char *s, int size, From lemburg at users.sourceforge.net Thu Oct 20 00:39:06 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Thu, 20 Oct 2005 00:39:06 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Objects unicodeobject.c, 2.233, 2.234 Message-ID: <20051019223906.AEDF01E4003@bag.python.org> Update of /cvsroot/python/python/dist/src/Objects In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14790 Modified Files: unicodeobject.c Log Message: Whitespace corrections. Index: unicodeobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v retrieving revision 2.233 retrieving revision 2.234 diff -u -d -r2.233 -r2.234 --- unicodeobject.c 19 Oct 2005 22:33:31 -0000 2.233 +++ unicodeobject.c 19 Oct 2005 22:39:02 -0000 2.234 @@ -850,7 +850,7 @@ #define SPECIAL(c, encodeO, encodeWS) \ ((c) > 127 || (c) <= 0 || utf7_special[(c)] == 1 || \ - (encodeWS && (utf7_special[(c)] == 2)) || \ + (encodeWS && (utf7_special[(c)] == 2)) || \ (encodeO && (utf7_special[(c)] == 3))) #define B64(n) \ @@ -861,29 +861,29 @@ ((c) == '+' ? 62 : (c) == '/' ? 63 : (c) >= 'a' ? \ (c) - 71 : (c) >= 'A' ? (c) - 65 : (c) + 4 ) -#define ENCODE(out, ch, bits) \ - while (bits >= 6) { \ - *out++ = B64(ch >> (bits-6)); \ - bits -= 6; \ +#define ENCODE(out, ch, bits) \ + while (bits >= 6) { \ + *out++ = B64(ch >> (bits-6)); \ + bits -= 6; \ } -#define DECODE(out, ch, bits, surrogate) \ - while (bits >= 16) { \ - Py_UNICODE outCh = (Py_UNICODE) ((ch >> (bits-16)) & 0xffff); \ - bits -= 16; \ - if (surrogate) { \ +#define DECODE(out, ch, bits, surrogate) \ + while (bits >= 16) { \ + Py_UNICODE outCh = (Py_UNICODE) ((ch >> (bits-16)) & 0xffff); \ + bits -= 16; \ + if (surrogate) { \ /* We have already generated an error for the high surrogate \ so let's not bother seeing if the low surrogate is correct or not */ \ - surrogate = 0; \ - } else if (0xDC00 <= outCh && outCh <= 0xDFFF) { \ + surrogate = 0; \ + } else if (0xDC00 <= outCh && outCh <= 0xDFFF) { \ /* This is a surrogate pair. Unfortunately we can't represent \ - it in a 16-bit character */ \ - surrogate = 1; \ - errmsg = "code pairs are not supported"; \ - goto utf7Error; \ - } else { \ - *out++ = outCh; \ - } \ + it in a 16-bit character */ \ + surrogate = 1; \ + errmsg = "code pairs are not supported"; \ + goto utf7Error; \ + } else { \ + *out++ = outCh; \ + } \ } PyObject *PyUnicode_DecodeUTF7(const char *s, From nnorwitz at users.sourceforge.net Thu Oct 20 06:15:56 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Thu, 20 Oct 2005 06:15:56 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Objects stringobject.c, 2.233, 2.234 Message-ID: <20051020041556.17A711E4003@bag.python.org> Update of /cvsroot/python/python/dist/src/Objects In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18773/Objects Modified Files: stringobject.c Log Message: SF bug #1331563 ] string_subscript doesn't check for failed PyMem_Malloc. Will backport Index: stringobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v retrieving revision 2.233 retrieving revision 2.234 diff -u -d -r2.233 -r2.234 --- stringobject.c 1 Oct 2005 17:06:00 -0000 2.233 +++ stringobject.c 20 Oct 2005 04:15:52 -0000 2.234 @@ -1198,6 +1198,8 @@ else { source_buf = PyString_AsString((PyObject*)self); result_buf = PyMem_Malloc(slicelength); + if (result_buf == NULL) + return PyErr_NoMemory(); for (cur = start, i = 0; i < slicelength; cur += step, i++) { From nnorwitz at users.sourceforge.net Thu Oct 20 06:16:48 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Thu, 20 Oct 2005 06:16:48 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Objects stringobject.c, 2.226, 2.226.2.1 Message-ID: <20051020041648.A43231E4003@bag.python.org> Update of /cvsroot/python/python/dist/src/Objects In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18947/Objects Modified Files: Tag: release24-maint stringobject.c Log Message: Backport: SF bug #1331563 ] string_subscript doesn't check for failed PyMem_Malloc. Index: stringobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v retrieving revision 2.226 retrieving revision 2.226.2.1 diff -u -d -r2.226 -r2.226.2.1 --- stringobject.c 26 Oct 2004 01:52:37 -0000 2.226 +++ stringobject.c 20 Oct 2005 04:16:45 -0000 2.226.2.1 @@ -1184,6 +1184,8 @@ else { source_buf = PyString_AsString((PyObject*)self); result_buf = PyMem_Malloc(slicelength); + if (result_buf == NULL) + return PyErr_NoMemory(); for (cur = start, i = 0; i < slicelength; cur += step, i++) { From nnorwitz at users.sourceforge.net Thu Oct 20 06:30:18 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Thu, 20 Oct 2005 06:30:18 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Modules _bsddb.c,1.46,1.47 Message-ID: <20051020043018.138701E4003@bag.python.org> Update of /cvsroot/python/python/dist/src/Modules In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20778/Modules Modified Files: _bsddb.c Log Message: Get bsddb module to compile with version 3.2 of BSD DB. Index: _bsddb.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/_bsddb.c,v retrieving revision 1.46 retrieving revision 1.47 diff -u -d -r1.46 -r1.47 --- _bsddb.c 16 Jun 2005 19:01:42 -0000 1.46 +++ _bsddb.c 20 Oct 2005 04:30:15 -0000 1.47 @@ -1969,6 +1969,7 @@ RETURN_NONE(); } +#if (DBVER >= 33) static int _default_cmp (const DBT *leftKey, const DBT *rightKey) @@ -2120,6 +2121,7 @@ RETURN_IF_ERR (); RETURN_NONE (); } +#endif /* DBVER >= 33 */ static PyObject* @@ -3956,6 +3958,7 @@ } +#if (DBVER >= 33) static PyObject* DBEnv_set_lg_regionmax(DBEnvObject* self, PyObject* args) { @@ -3971,6 +3974,7 @@ RETURN_IF_ERR(); RETURN_NONE(); } +#endif static PyObject* @@ -4593,7 +4597,9 @@ {"remove", (PyCFunction)DB_remove, METH_VARARGS|METH_KEYWORDS}, {"rename", (PyCFunction)DB_rename, METH_VARARGS}, {"set_bt_minkey", (PyCFunction)DB_set_bt_minkey, METH_VARARGS}, +#if (DBVER >= 33) {"set_bt_compare", (PyCFunction)DB_set_bt_compare, METH_VARARGS}, +#endif {"set_cachesize", (PyCFunction)DB_set_cachesize, METH_VARARGS}, #if (DBVER >= 41) {"set_encrypt", (PyCFunction)DB_set_encrypt, METH_VARARGS|METH_KEYWORDS}, @@ -4683,7 +4689,9 @@ {"set_lg_bsize", (PyCFunction)DBEnv_set_lg_bsize, METH_VARARGS}, {"set_lg_dir", (PyCFunction)DBEnv_set_lg_dir, METH_VARARGS}, {"set_lg_max", (PyCFunction)DBEnv_set_lg_max, METH_VARARGS}, +#if (DBVER >= 33) {"set_lg_regionmax",(PyCFunction)DBEnv_set_lg_regionmax, METH_VARARGS}, +#endif {"set_lk_detect", (PyCFunction)DBEnv_set_lk_detect, METH_VARARGS}, {"set_lk_max", (PyCFunction)DBEnv_set_lk_max, METH_VARARGS}, #if (DBVER >= 32) From nnorwitz at users.sourceforge.net Thu Oct 20 06:36:11 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Thu, 20 Oct 2005 06:36:11 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/bsddb __init__.py,1.16,1.17 Message-ID: <20051020043611.D01771E4003@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/bsddb In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21720/Lib/bsddb Modified Files: __init__.py Log Message: Get BSD DB working most for version 3.2 Index: __init__.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/bsddb/__init__.py,v retrieving revision 1.16 retrieving revision 1.17 diff -u -d -r1.16 -r1.17 --- __init__.py 3 Mar 2005 09:46:06 -0000 1.16 +++ __init__.py 20 Oct 2005 04:36:08 -0000 1.17 @@ -372,6 +372,8 @@ try: import thread del thread + if db.version() < (3, 3, 0): + db.DB_THREAD = 0 except ImportError: db.DB_THREAD = 0 From nnorwitz at users.sourceforge.net Thu Oct 20 06:37:01 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Thu, 20 Oct 2005 06:37:01 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/bsddb __init__.py, 1.15.4.1, 1.15.4.2 Message-ID: <20051020043701.1EAF31E4003@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/bsddb In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21822/Lib/bsddb Modified Files: Tag: release24-maint __init__.py Log Message: Get BSD DB working most for version 3.2 Index: __init__.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/bsddb/__init__.py,v retrieving revision 1.15.4.1 retrieving revision 1.15.4.2 diff -u -d -r1.15.4.1 -r1.15.4.2 --- __init__.py 3 Mar 2005 09:45:32 -0000 1.15.4.1 +++ __init__.py 20 Oct 2005 04:36:58 -0000 1.15.4.2 @@ -372,6 +372,8 @@ try: import thread del thread + if db.version() < (3, 3, 0): + db.DB_THREAD = 0 except ImportError: db.DB_THREAD = 0 From nnorwitz at users.sourceforge.net Thu Oct 20 06:50:16 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Thu, 20 Oct 2005 06:50:16 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib tarfile.py,1.33,1.34 Message-ID: <20051020045016.70FB71E4003@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23342/Lib Modified Files: tarfile.py Log Message: Fix SF bug # 1330039, patch # 1331635 from Lars Gustaebel (tarfile maintainer) Problem: if two files are assigned the same inode number by the filesystem, the second one will be added as a hardlink to the first, which means that the content will be lost. The patched code checks if the file's st_nlink is greater 1. So only for files that actually have several links pointing to them hardlinks will be created, which is what GNU tar does. Will backport. Index: tarfile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/tarfile.py,v retrieving revision 1.33 retrieving revision 1.34 diff -u -d -r1.33 -r1.34 --- tarfile.py 11 Sep 2005 16:34:05 -0000 1.33 +++ tarfile.py 20 Oct 2005 04:50:13 -0000 1.34 @@ -1150,7 +1150,8 @@ stmd = statres.st_mode if stat.S_ISREG(stmd): inode = (statres.st_ino, statres.st_dev) - if inode in self.inodes and not self.dereference: + if not self.dereference and \ + statres.st_nlink > 1 and inode in self.inodes: # Is it a hardlink to an already # archived file? type = LNKTYPE From nnorwitz at users.sourceforge.net Thu Oct 20 06:50:16 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Thu, 20 Oct 2005 06:50:16 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_tarfile.py, 1.19, 1.20 Message-ID: <20051020045016.83FE01E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23342/Lib/test Modified Files: test_tarfile.py Log Message: Fix SF bug # 1330039, patch # 1331635 from Lars Gustaebel (tarfile maintainer) Problem: if two files are assigned the same inode number by the filesystem, the second one will be added as a hardlink to the first, which means that the content will be lost. The patched code checks if the file's st_nlink is greater 1. So only for files that actually have several links pointing to them hardlinks will be created, which is what GNU tar does. Will backport. Index: test_tarfile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_tarfile.py,v retrieving revision 1.19 retrieving revision 1.20 diff -u -d -r1.19 -r1.20 --- test_tarfile.py 27 Aug 2005 10:07:56 -0000 1.19 +++ test_tarfile.py 20 Oct 2005 04:50:13 -0000 1.20 @@ -372,6 +372,53 @@ if e.errno == errno.ENOENT: self.fail("hardlink not extracted properly") +class CreateHardlinkTest(BaseTest): + """Test the creation of LNKTYPE (hardlink) members in an archive. + In this respect tarfile.py mimics the behaviour of GNU tar: If + a file has a st_nlink > 1, it will be added a REGTYPE member + only the first time. + """ + + def setUp(self): + self.tar = tarfile.open(tmpname(), "w") + + self.foo = os.path.join(dirname(), "foo") + self.bar = os.path.join(dirname(), "bar") + + if os.path.exists(self.foo): + os.remove(self.foo) + if os.path.exists(self.bar): + os.remove(self.bar) + + file(self.foo, "w").write("foo") + self.tar.add(self.foo) + + def test_add_twice(self): + # If st_nlink == 1 then the same file will be added as + # REGTYPE every time. + tarinfo = self.tar.gettarinfo(self.foo) + self.assertEqual(tarinfo.type, tarfile.REGTYPE, + "add file as regular failed") + + def test_add_hardlink(self): + # If st_nlink > 1 then the same file will be added as + # LNKTYPE. + os.link(self.foo, self.bar) + tarinfo = self.tar.gettarinfo(self.foo) + self.assertEqual(tarinfo.type, tarfile.LNKTYPE, + "add file as hardlink failed") + + tarinfo = self.tar.gettarinfo(self.bar) + self.assertEqual(tarinfo.type, tarfile.LNKTYPE, + "add file as hardlink failed") + + def test_dereference_hardlink(self): + self.tar.dereference = True + os.link(self.foo, self.bar) + tarinfo = self.tar.gettarinfo(self.bar) + self.assertEqual(tarinfo.type, tarfile.REGTYPE, + "dereferencing hardlink failed") + # Gzip TestCases class ReadTestGzip(ReadTest): @@ -387,7 +434,6 @@ class ReadStreamAsteriskTestGzip(ReadStreamAsteriskTest): comp = "gz" - # Filemode test cases class FileModeTest(unittest.TestCase): @@ -440,6 +486,7 @@ if hasattr(os, "link"): tests.append(ExtractHardlinkTest) + tests.append(CreateHardlinkTest) if gzip: tests.extend([ From nnorwitz at users.sourceforge.net Thu Oct 20 06:56:12 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Thu, 20 Oct 2005 06:56:12 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib tarfile.py, 1.21.2.5, 1.21.2.6 Message-ID: <20051020045612.156AD1E4003@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25202/Lib Modified Files: Tag: release24-maint tarfile.py Log Message: Backport: Fix SF bug # 1330039, patch # 1331635 from Lars Gustaebel (tarfile maintainer) Problem: if two files are assigned the same inode number by the filesystem, the second one will be added as a hardlink to the first, which means that the content will be lost. The patched code checks if the file's st_nlink is greater 1. So only for files that actually have several links pointing to them hardlinks will be created, which is what GNU tar does. Index: tarfile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/tarfile.py,v retrieving revision 1.21.2.5 retrieving revision 1.21.2.6 diff -u -d -r1.21.2.5 -r1.21.2.6 --- tarfile.py 27 Aug 2005 10:08:21 -0000 1.21.2.5 +++ tarfile.py 20 Oct 2005 04:56:09 -0000 1.21.2.6 @@ -1103,7 +1103,8 @@ stmd = statres.st_mode if stat.S_ISREG(stmd): inode = (statres.st_ino, statres.st_dev) - if inode in self.inodes and not self.dereference: + if not self.dereference and \ + statres.st_nlink > 1 and inode in self.inodes: # Is it a hardlink to an already # archived file? type = LNKTYPE From nnorwitz at users.sourceforge.net Thu Oct 20 06:56:12 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Thu, 20 Oct 2005 06:56:12 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_tarfile.py, 1.16.2.1, 1.16.2.2 Message-ID: <20051020045612.4C4C71E4003@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25202/Lib/test Modified Files: Tag: release24-maint test_tarfile.py Log Message: Backport: Fix SF bug # 1330039, patch # 1331635 from Lars Gustaebel (tarfile maintainer) Problem: if two files are assigned the same inode number by the filesystem, the second one will be added as a hardlink to the first, which means that the content will be lost. The patched code checks if the file's st_nlink is greater 1. So only for files that actually have several links pointing to them hardlinks will be created, which is what GNU tar does. Index: test_tarfile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_tarfile.py,v retrieving revision 1.16.2.1 retrieving revision 1.16.2.2 diff -u -d -r1.16.2.1 -r1.16.2.2 --- test_tarfile.py 27 Aug 2005 10:08:21 -0000 1.16.2.1 +++ test_tarfile.py 20 Oct 2005 04:56:09 -0000 1.16.2.2 @@ -350,6 +350,53 @@ if e.errno == errno.ENOENT: self.fail("hardlink not extracted properly") +class CreateHardlinkTest(BaseTest): + """Test the creation of LNKTYPE (hardlink) members in an archive. + In this respect tarfile.py mimics the behaviour of GNU tar: If + a file has a st_nlink > 1, it will be added a REGTYPE member + only the first time. + """ + + def setUp(self): + self.tar = tarfile.open(tmpname(), "w") + + self.foo = os.path.join(dirname(), "foo") + self.bar = os.path.join(dirname(), "bar") + + if os.path.exists(self.foo): + os.remove(self.foo) + if os.path.exists(self.bar): + os.remove(self.bar) + + file(self.foo, "w").write("foo") + self.tar.add(self.foo) + + def test_add_twice(self): + # If st_nlink == 1 then the same file will be added as + # REGTYPE every time. + tarinfo = self.tar.gettarinfo(self.foo) + self.assertEqual(tarinfo.type, tarfile.REGTYPE, + "add file as regular failed") + + def test_add_hardlink(self): + # If st_nlink > 1 then the same file will be added as + # LNKTYPE. + os.link(self.foo, self.bar) + tarinfo = self.tar.gettarinfo(self.foo) + self.assertEqual(tarinfo.type, tarfile.LNKTYPE, + "add file as hardlink failed") + + tarinfo = self.tar.gettarinfo(self.bar) + self.assertEqual(tarinfo.type, tarfile.LNKTYPE, + "add file as hardlink failed") + + def test_dereference_hardlink(self): + self.tar.dereference = True + os.link(self.foo, self.bar) + tarinfo = self.tar.gettarinfo(self.bar) + self.assertEqual(tarinfo.type, tarfile.REGTYPE, + "dereferencing hardlink failed") + # Gzip TestCases class ReadTestGzip(ReadTest): @@ -407,6 +454,7 @@ if hasattr(os, "link"): tests.append(ExtractHardlinkTest) + tests.append(CreateHardlinkTest) if gzip: tests.extend([ From nnorwitz at users.sourceforge.net Thu Oct 20 07:28:32 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Thu, 20 Oct 2005 07:28:32 +0200 (CEST) Subject: [Python-checkins] python/dist/src setup.py,1.221,1.222 Message-ID: <20051020052832.98DA11E4005@bag.python.org> Update of /cvsroot/python/python/dist/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29964 Modified Files: setup.py Log Message: Find bsd db v4 on gentoo (2005 i think) Index: setup.py =================================================================== RCS file: /cvsroot/python/python/dist/src/setup.py,v retrieving revision 1.221 retrieving revision 1.222 diff -u -d -r1.221 -r1.222 --- setup.py 23 Aug 2005 21:19:40 -0000 1.221 +++ setup.py 20 Oct 2005 05:28:29 -0000 1.222 @@ -579,6 +579,7 @@ # 4.x minor number specific paths for x in (0,1,2,3): db_inc_paths.append('/usr/include/db4%d' % x) + db_inc_paths.append('/usr/include/db4.%d' % x) db_inc_paths.append('/usr/local/BerkeleyDB.4.%d/include' % x) db_inc_paths.append('/usr/local/include/db4%d' % x) db_inc_paths.append('/pkg/db-4.%d/include' % x) From jhylton at users.sourceforge.net Thu Oct 20 16:27:25 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 20 Oct 2005 16:27:25 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_trace.py, 1.14, 1.15 test_symtable.py, 1.5, 1.6 Message-ID: <20051020142725.8B1991E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16859/Lib/test Modified Files: test_trace.py test_symtable.py Log Message: Disable some tests in anticipation of merging ast-branch to the head Index: test_trace.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_trace.py,v retrieving revision 1.14 retrieving revision 1.15 diff -u -d -r1.14 -r1.15 --- test_trace.py 15 Aug 2005 18:14:18 -0000 1.14 +++ test_trace.py 20 Oct 2005 14:27:21 -0000 1.15 @@ -221,14 +221,16 @@ def test_01_basic(self): self.run_test(basic) - def test_02_arigo(self): - self.run_test(arigo_example) +## XXX: These tests fail with the new ast compiler. They must +## be fixed before a release. +## def test_02_arigo(self): +## self.run_test(arigo_example) def test_03_one_instr(self): self.run_test(one_instr_line) - def test_04_no_pop_blocks(self): - self.run_test(no_pop_blocks) - def test_05_no_pop_tops(self): - self.run_test(no_pop_tops) +## def test_04_no_pop_blocks(self): +## self.run_test(no_pop_blocks) +## def test_05_no_pop_tops(self): +## self.run_test(no_pop_tops) def test_06_call(self): self.run_test(call) def test_07_raise(self): @@ -238,8 +240,8 @@ self.run_test2(settrace_and_return) def test_09_settrace_and_raise(self): self.run_test2(settrace_and_raise) - def test_10_ireturn(self): - self.run_test(ireturn_example) +## def test_10_ireturn(self): +## self.run_test(ireturn_example) def test_11_tightloop(self): self.run_test(tightloop_example) def test_12_tighterloop(self): @@ -577,14 +579,17 @@ self.run_test(no_jump_too_far_forwards) def test_09_no_jump_too_far_backwards(self): self.run_test(no_jump_too_far_backwards) - def test_10_no_jump_to_except_1(self): - self.run_test(no_jump_to_except_1) - def test_11_no_jump_to_except_2(self): - self.run_test(no_jump_to_except_2) - def test_12_no_jump_to_except_3(self): - self.run_test(no_jump_to_except_3) - def test_13_no_jump_to_except_4(self): - self.run_test(no_jump_to_except_4) +# XXX: These tests cause the interpreter to crash. The frame_setlineno() +# function no longer works correctly because the lineno table generated by +# the AST compiler is slightly different than with the old compiler. +# def test_10_no_jump_to_except_1(self): +# self.run_test(no_jump_to_except_1) +# def test_11_no_jump_to_except_2(self): +# self.run_test(no_jump_to_except_2) +# def test_12_no_jump_to_except_3(self): +# self.run_test(no_jump_to_except_3) +# def test_13_no_jump_to_except_4(self): +# self.run_test(no_jump_to_except_4) def test_14_no_jump_forwards_into_block(self): self.run_test(no_jump_forwards_into_block) def test_15_no_jump_backwards_into_block(self): Index: test_symtable.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_symtable.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- test_symtable.py 15 Jul 2003 20:24:27 -0000 1.5 +++ test_symtable.py 20 Oct 2005 14:27:21 -0000 1.6 @@ -4,17 +4,20 @@ symbols = _symtable.symtable("def f(x): return x", "?", "exec") -vereq(symbols[0].name, "global") -vereq(len([ste for ste in symbols.values() if ste.name == "f"]), 1) +## XXX +## Test disabled because symtable module needs to be rewritten for new compiler -# Bug tickler: SyntaxError file name correct whether error raised -# while parsing or building symbol table. -def checkfilename(brokencode): - try: - _symtable.symtable(brokencode, "spam", "exec") - except SyntaxError, e: - vereq(e.filename, "spam") - else: - raise TestFailed("no SyntaxError for %r" % (brokencode,)) -checkfilename("def f(x): foo)(") # parse-time -checkfilename("def f(x): global x") # symtable-build-time +##vereq(symbols[0].name, "global") +##vereq(len([ste for ste in symbols.values() if ste.name == "f"]), 1) + +### Bug tickler: SyntaxError file name correct whether error raised +### while parsing or building symbol table. +##def checkfilename(brokencode): +## try: +## _symtable.symtable(brokencode, "spam", "exec") +## except SyntaxError, e: +## vereq(e.filename, "spam") +## else: +## raise TestFailed("no SyntaxError for %r" % (brokencode,)) +##checkfilename("def f(x): foo)(") # parse-time +##checkfilename("def f(x): global x") # symtable-build-time From fdrake at users.sourceforge.net Thu Oct 20 19:52:13 2005 From: fdrake at users.sourceforge.net (fdrake@users.sourceforge.net) Date: Thu, 20 Oct 2005 19:52:13 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libcodecs.tex,1.37,1.38 Message-ID: <20051020175213.63B321E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv4553 Modified Files: libcodecs.tex Log Message: add missing word Index: libcodecs.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libcodecs.tex,v retrieving revision 1.37 retrieving revision 1.38 diff -u -d -r1.37 -r1.38 --- libcodecs.tex 9 Oct 2005 19:41:20 -0000 1.37 +++ libcodecs.tex 20 Oct 2005 17:52:05 -0000 1.38 @@ -214,7 +214,7 @@ \subsection{Codec Base Classes} -The \module{codecs} defines a set of base classes which define the +The \module{codecs} module defines a set of base classes which define the interface and can also be used to easily write you own codecs for use in Python. From fdrake at users.sourceforge.net Thu Oct 20 19:53:04 2005 From: fdrake at users.sourceforge.net (fdrake@users.sourceforge.net) Date: Thu, 20 Oct 2005 19:53:04 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/lib libcodecs.tex, 1.34.2.2, 1.34.2.3 Message-ID: <20051020175304.4E7C81E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv4699 Modified Files: Tag: release24-maint libcodecs.tex Log Message: add missing word (backported from trunk revision 1.38) Index: libcodecs.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libcodecs.tex,v retrieving revision 1.34.2.2 retrieving revision 1.34.2.3 diff -u -d -r1.34.2.2 -r1.34.2.3 --- libcodecs.tex 24 Aug 2005 07:38:36 -0000 1.34.2.2 +++ libcodecs.tex 20 Oct 2005 17:53:01 -0000 1.34.2.3 @@ -214,7 +214,7 @@ \subsection{Codec Base Classes} -The \module{codecs} defines a set of base classes which define the +The \module{codecs} module defines a set of base classes which define the interface and can also be used to easily write you own codecs for use in Python. From lemburg at users.sourceforge.net Thu Oct 20 21:06:39 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Thu, 20 Oct 2005 21:06:39 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Objects unicodectype.c, 2.16, 2.17 Message-ID: <20051020190639.2CB031E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Objects In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21307 Modified Files: unicodectype.c Log Message: Enhance the performance of two important Unicode character type lookups: whitespace and linebreak. These lookup tables are from the Python 1.6 version with the addition of the 205F code point which was added as whitespace code point to Unicode since then. Index: unicodectype.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/unicodectype.c,v retrieving revision 2.16 retrieving revision 2.17 diff -u -d -r2.16 -r2.17 --- unicodectype.c 4 Aug 2004 07:38:34 -0000 2.16 +++ unicodectype.c 20 Oct 2005 19:06:35 -0000 2.17 @@ -49,14 +49,24 @@ return &_PyUnicode_TypeRecords[index]; } -/* Returns 1 for Unicode characters having the category 'Zl' or type - 'B', 0 otherwise. */ +/* Returns 1 for Unicode characters having the category 'Zl', 'Zp' or + type 'B', 0 otherwise. */ -int _PyUnicode_IsLinebreak(Py_UNICODE ch) +int _PyUnicode_IsLinebreak(register const Py_UNICODE ch) { - const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); - - return (ctype->flags & LINEBREAK_MASK) != 0; + switch (ch) { + case 0x000A: /* LINE FEED */ + case 0x000D: /* CARRIAGE RETURN */ + case 0x001C: /* FILE SEPARATOR */ + case 0x001D: /* GROUP SEPARATOR */ + case 0x001E: /* RECORD SEPARATOR */ + case 0x0085: /* NEXT LINE */ + case 0x2028: /* LINE SEPARATOR */ + case 0x2029: /* PARAGRAPH SEPARATOR */ + return 1; + default: + return 0; + } } /* Returns the titlecase Unicode characters corresponding to ch or just @@ -327,11 +337,43 @@ /* Returns 1 for Unicode characters having the bidirectional type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */ -int _PyUnicode_IsWhitespace(Py_UNICODE ch) +int _PyUnicode_IsWhitespace(register const Py_UNICODE ch) { - const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); - - return (ctype->flags & SPACE_MASK) != 0; + switch (ch) { + case 0x0009: /* HORIZONTAL TABULATION */ + case 0x000A: /* LINE FEED */ + case 0x000B: /* VERTICAL TABULATION */ + case 0x000C: /* FORM FEED */ + case 0x000D: /* CARRIAGE RETURN */ + case 0x001C: /* FILE SEPARATOR */ + case 0x001D: /* GROUP SEPARATOR */ + case 0x001E: /* RECORD SEPARATOR */ + case 0x001F: /* UNIT SEPARATOR */ + case 0x0020: /* SPACE */ + case 0x0085: /* NEXT LINE */ + case 0x00A0: /* NO-BREAK SPACE */ + case 0x1680: /* OGHAM SPACE MARK */ + case 0x2000: /* EN QUAD */ + case 0x2001: /* EM QUAD */ + case 0x2002: /* EN SPACE */ + case 0x2003: /* EM SPACE */ + case 0x2004: /* THREE-PER-EM SPACE */ + case 0x2005: /* FOUR-PER-EM SPACE */ + case 0x2006: /* SIX-PER-EM SPACE */ + case 0x2007: /* FIGURE SPACE */ + case 0x2008: /* PUNCTUATION SPACE */ + case 0x2009: /* THIN SPACE */ + case 0x200A: /* HAIR SPACE */ + case 0x200B: /* ZERO WIDTH SPACE */ + case 0x2028: /* LINE SEPARATOR */ + case 0x2029: /* PARAGRAPH SEPARATOR */ + case 0x202F: /* NARROW NO-BREAK SPACE */ + case 0x205F: /* MEDIUM MATHEMATICAL SPACE */ + case 0x3000: /* IDEOGRAPHIC SPACE */ + return 1; + default: + return 0; + } } /* Returns 1 for Unicode characters having the category 'Ll', 0 From jhylton at users.sourceforge.net Thu Oct 20 21:59:27 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 20 Oct 2005 21:59:27 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/compiler pyassem.py, 1.31, 1.32 Message-ID: <20051020195927.7E00C1E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/compiler In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2121/Lib/compiler Modified Files: pyassem.py Log Message: Merge ast-branch to head This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests. Index: pyassem.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/compiler/pyassem.py,v retrieving revision 1.31 retrieving revision 1.32 diff -u -d -r1.31 -r1.32 --- pyassem.py 31 Dec 2002 18:17:42 -0000 1.31 +++ pyassem.py 20 Oct 2005 19:59:24 -0000 1.32 @@ -364,16 +364,15 @@ def getCode(self): """Get a Python code object""" - if self.stage == RAW: - self.computeStackDepth() - self.flattenGraph() - if self.stage == FLAT: - self.convertArgs() - if self.stage == CONV: - self.makeByteCode() - if self.stage == DONE: - return self.newCodeObject() - raise RuntimeError, "inconsistent PyFlowGraph state" + assert self.stage == RAW + self.computeStackDepth() + self.flattenGraph() + assert self.stage == FLAT + self.convertArgs() + assert self.stage == CONV + self.makeByteCode() + assert self.stage == DONE + return self.newCodeObject() def dump(self, io=None): if io: From jhylton at users.sourceforge.net Thu Oct 20 21:59:27 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 20 Oct 2005 21:59:27 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib cgitb.py, 1.17, 1.18 pydoc.py, 1.107, 1.108 Message-ID: <20051020195927.842AB1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2121/Lib Modified Files: cgitb.py pydoc.py Log Message: Merge ast-branch to head This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests. Index: cgitb.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/cgitb.py,v retrieving revision 1.17 retrieving revision 1.18 diff -u -d -r1.17 -r1.18 --- cgitb.py 26 Jun 2005 21:57:55 -0000 1.17 +++ cgitb.py 20 Oct 2005 19:59:24 -0000 1.18 @@ -22,6 +22,7 @@ """ __author__ = 'Ka-Ping Yee' + __version__ = '$Revision$' import sys Index: pydoc.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/pydoc.py,v retrieving revision 1.107 retrieving revision 1.108 diff -u -d -r1.107 -r1.108 --- pydoc.py 1 Oct 2005 16:32:31 -0000 1.107 +++ pydoc.py 20 Oct 2005 19:59:24 -0000 1.108 @@ -36,6 +36,7 @@ __author__ = "Ka-Ping Yee " __date__ = "26 February 2001" + __version__ = "$Revision$" __credits__ = """Guido van Rossum, for an excellent programming language. Tommy Burnette, the original creator of manpy. From jhylton at users.sourceforge.net Thu Oct 20 21:59:27 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 20 Oct 2005 21:59:27 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test/output test_grammar, 1.21, 1.22 test_profile, 1.5, 1.6 Message-ID: <20051020195927.8DE201E4006@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test/output In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2121/Lib/test/output Modified Files: test_grammar test_profile Log Message: Merge ast-branch to head This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests. Index: test_grammar =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/output/test_grammar,v retrieving revision 1.21 retrieving revision 1.22 diff -u -d -r1.21 -r1.22 --- test_grammar 31 Aug 2004 10:07:09 -0000 1.21 +++ test_grammar 20 Oct 2005 19:59:24 -0000 1.22 @@ -34,6 +34,7 @@ continue + try/finally ok testing continue and break in try/except in loop return_stmt +yield_stmt raise_stmt import_name import_from Index: test_profile =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/output/test_profile,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- test_profile 12 Jul 2004 23:38:02 -0000 1.5 +++ test_profile 20 Oct 2005 19:59:24 -0000 1.6 @@ -7,7 +7,7 @@ 12 0.000 0.000 0.012 0.001 :0(hasattr) 8 0.000 0.000 0.000 0.000 :0(range) 1 0.000 0.000 0.000 0.000 :0(setprofile) - 1 0.000 0.000 1.000 1.000 :1(?) + 1 0.000 0.000 1.000 1.000 :1() 0 0.000 0.000 profile:0(profiler) 1 0.000 0.000 1.000 1.000 profile:0(testfunc()) 1 0.400 0.400 1.000 1.000 test_profile.py:23(testfunc) From jhylton at users.sourceforge.net Thu Oct 20 21:59:27 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 20 Oct 2005 21:59:27 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc ACKS,1.298,1.299 Message-ID: <20051020195927.9716B1E4007@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2121/Misc Modified Files: ACKS Log Message: Merge ast-branch to head This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests. Index: ACKS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/ACKS,v retrieving revision 1.298 retrieving revision 1.299 diff -u -d -r1.298 -r1.299 --- ACKS 3 Oct 2005 00:38:46 -0000 1.298 +++ ACKS 20 Oct 2005 19:59:24 -0000 1.299 @@ -165,6 +165,7 @@ Maxim Dzumanenko Hans Eckardt Grant Edwards +John Ehresman Andrew Eland Lance Ellinghaus David Ely From jhylton at users.sourceforge.net Thu Oct 20 21:59:27 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 20 Oct 2005 21:59:27 +0200 (CEST) Subject: [Python-checkins] python/dist/src Makefile.pre.in,1.153,1.154 Message-ID: <20051020195927.AFAEE1E4008@bag.python.org> Update of /cvsroot/python/python/dist/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2121 Modified Files: Makefile.pre.in Log Message: Merge ast-branch to head This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests. Index: Makefile.pre.in =================================================================== RCS file: /cvsroot/python/python/dist/src/Makefile.pre.in,v retrieving revision 1.153 retrieving revision 1.154 diff -u -d -r1.153 -r1.154 --- Makefile.pre.in 4 Oct 2005 04:32:42 -0000 1.153 +++ Makefile.pre.in 20 Oct 2005 19:59:24 -0000 1.154 @@ -216,10 +216,22 @@ PGENOBJS= $(PGENMAIN) $(POBJS) $(PGOBJS) +########################################################################## +# AST +AST_H= $(srcdir)/Include/Python-ast.h +AST_C= $(srcdir)/Python/Python-ast.c +AST_ASDL= $(srcdir)/Parser/Python.asdl + +ASDLGEN_FILES= $(srcdir)/Parser/asdl.py $(srcdir)/Parser/asdl_c.py +# XXX Note that a build now requires Python exist before the build starts +ASDLGEN= $(srcdir)/Parser/asdl_c.py -h $(srcdir)/Include -c $(srcdir)/Python ########################################################################## # Python PYTHON_OBJS= \ + Python/Python-ast.o \ + Python/asdl.o \ + Python/ast.o \ Python/bltinmodule.o \ Python/exceptions.o \ Python/ceval.o \ @@ -265,6 +277,7 @@ Objects/cellobject.o \ Objects/classobject.o \ Objects/cobject.o \ + Objects/codeobject.o \ Objects/complexobject.o \ Objects/descrobject.o \ Objects/enumobject.o \ @@ -457,8 +470,10 @@ Parser/tokenizer_pgen.o: $(srcdir)/Parser/tokenizer.c +$(AST_H) $(AST_C): $(AST_ASDL) $(ASDLGEN_FILES) + $(ASDLGEN) $(AST_ASDL) -Python/compile.o Python/symtable.o: $(GRAMMAR_H) +Python/compile.o Python/symtable.o: $(GRAMMAR_H) $(AST_H) Python/getplatform.o: $(srcdir)/Python/getplatform.c $(CC) -c $(PY_CFLAGS) -DPLATFORM='"$(MACHDEP)"' -o $@ $(srcdir)/Python/getplatform.c @@ -474,12 +489,15 @@ PYTHON_HEADERS= \ Include/Python.h \ + Include/Python-ast.h \ + Include/asdl.h \ Include/abstract.h \ Include/boolobject.h \ Include/bufferobject.h \ Include/ceval.h \ Include/classobject.h \ Include/cobject.h \ + Include/code.h \ Include/codecs.h \ Include/compile.h \ Include/complexobject.h \ From jhylton at users.sourceforge.net Thu Oct 20 21:59:27 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 20 Oct 2005 21:59:27 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Modules _hotshot.c, 1.37, 1.38 symtablemodule.c, 1.8, 1.9 Message-ID: <20051020195927.B56E91E4009@bag.python.org> Update of /cvsroot/python/python/dist/src/Modules In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2121/Modules Modified Files: _hotshot.c symtablemodule.c Log Message: Merge ast-branch to head This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests. Index: _hotshot.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/_hotshot.c,v retrieving revision 1.37 retrieving revision 1.38 diff -u -d -r1.37 -r1.38 --- _hotshot.c 3 Aug 2004 08:33:55 -0000 1.37 +++ _hotshot.c 20 Oct 2005 19:59:24 -0000 1.38 @@ -3,6 +3,7 @@ */ #include "Python.h" +#include "code.h" #include "compile.h" #include "eval.h" #include "frameobject.h" Index: symtablemodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/symtablemodule.c,v retrieving revision 1.8 retrieving revision 1.9 diff -u -d -r1.8 -r1.9 --- symtablemodule.c 12 Oct 2003 19:09:37 -0000 1.8 +++ symtablemodule.c 20 Oct 2005 19:59:24 -0000 1.9 @@ -1,6 +1,8 @@ #include "Python.h" +#include "code.h" #include "compile.h" +#include "Python-ast.h" #include "symtable.h" static PyObject * @@ -64,9 +66,9 @@ PyModule_AddIntConstant(m, "DEF_IMPORT", DEF_IMPORT); PyModule_AddIntConstant(m, "DEF_BOUND", DEF_BOUND); - PyModule_AddIntConstant(m, "TYPE_FUNCTION", TYPE_FUNCTION); - PyModule_AddIntConstant(m, "TYPE_CLASS", TYPE_CLASS); - PyModule_AddIntConstant(m, "TYPE_MODULE", TYPE_MODULE); + PyModule_AddIntConstant(m, "TYPE_FUNCTION", FunctionBlock); + PyModule_AddIntConstant(m, "TYPE_CLASS", ClassBlock); + PyModule_AddIntConstant(m, "TYPE_MODULE", ModuleBlock); PyModule_AddIntConstant(m, "OPT_IMPORT_STAR", OPT_IMPORT_STAR); PyModule_AddIntConstant(m, "OPT_EXEC", OPT_EXEC); From jhylton at users.sourceforge.net Thu Oct 20 21:59:28 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 20 Oct 2005 21:59:28 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_code.py, 1.1, 1.2 test_doctest.py, 1.54, 1.55 test_eof.py, 1.1, 1.2 test_generators.py, 1.48, 1.49 test_genexps.py, 1.8, 1.9 test_grammar.py, 1.52, 1.53 test_import.py, 1.19, 1.20 test_parser.py, 1.23, 1.24 test_repr.py, 1.19, 1.20 test_scope.py, 1.27, 1.28 Message-ID: <20051020195928.248781E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2121/Lib/test Modified Files: test_doctest.py test_eof.py test_generators.py test_genexps.py test_grammar.py test_import.py test_parser.py test_repr.py test_scope.py Added Files: test_code.py Log Message: Merge ast-branch to head This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests. Index: test_doctest.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_doctest.py,v retrieving revision 1.54 retrieving revision 1.55 diff -u -d -r1.54 -r1.55 --- test_doctest.py 26 Jun 2005 23:09:51 -0000 1.54 +++ test_doctest.py 20 Oct 2005 19:59:24 -0000 1.55 @@ -1559,11 +1559,11 @@ >>> try: doctest.debug_src(s) ... finally: sys.stdin = real_stdin - > (1)?() + > (1)() (Pdb) next 12 --Return-- - > (1)?()->None + > (1)()->None (Pdb) print x 12 (Pdb) continue @@ -1601,7 +1601,7 @@ >>> try: runner.run(test) ... finally: sys.stdin = real_stdin --Return-- - > (1)?()->None + > (1)()->None -> import pdb; pdb.set_trace() (Pdb) print x 42 @@ -1637,7 +1637,7 @@ (Pdb) print y 2 (Pdb) up - > (1)?() + > (1)() -> calls_set_trace() (Pdb) print x 1 @@ -1686,7 +1686,7 @@ [EOF] (Pdb) next --Return-- - > (1)?()->None + > (1)()->None -> f(3) (Pdb) list 1 -> f(3) @@ -1779,7 +1779,7 @@ (Pdb) print y 1 (Pdb) up - > (1)?() + > (1)() -> calls_set_trace() (Pdb) print foo *** NameError: name 'foo' is not defined Index: test_eof.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_eof.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- test_eof.py 15 Aug 2002 01:28:54 -0000 1.1 +++ test_eof.py 20 Oct 2005 19:59:24 -0000 1.2 @@ -7,21 +7,21 @@ class EOFTestCase(unittest.TestCase): def test_EOFC(self): + expect = "EOL while scanning single-quoted string (, line 1)" try: eval("""'this is a test\ """) except SyntaxError, msg: - self.assertEqual(str(msg), - "EOL while scanning single-quoted string (line 1)") + self.assertEqual(str(msg), expect) else: raise test_support.TestFailed def test_EOFS(self): + expect = "EOF while scanning triple-quoted string (, line 1)" try: eval("""'''this is a test""") except SyntaxError, msg: - self.assertEqual(str(msg), - "EOF while scanning triple-quoted string (line 1)") + self.assertEqual(str(msg), expect) else: raise test_support.TestFailed Index: test_generators.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_generators.py,v retrieving revision 1.48 retrieving revision 1.49 diff -u -d -r1.48 -r1.49 --- test_generators.py 26 Aug 2005 15:20:48 -0000 1.48 +++ test_generators.py 20 Oct 2005 19:59:24 -0000 1.49 @@ -774,7 +774,7 @@ ... try: ... 1//0 ... except ZeroDivisionError: -... yield 666 # bad because *outer* try has finally +... yield 666 ... except: ... pass ... finally: Index: test_genexps.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_genexps.py,v retrieving revision 1.8 retrieving revision 1.9 diff -u -d -r1.8 -r1.9 --- test_genexps.py 2 Aug 2005 00:46:43 -0000 1.8 +++ test_genexps.py 20 Oct 2005 19:59:24 -0000 1.9 @@ -125,13 +125,12 @@ >>> (y for y in (1,2)) = 10 Traceback (most recent call last): ... - SyntaxError: assign to generator expression not possible + SyntaxError: assignment to generator expression not possible (, line 1) >>> (y for y in (1,2)) += 10 Traceback (most recent call last): ... - SyntaxError: augmented assign to tuple literal, yield, or generator expression not possible - + SyntaxError: augmented assignment to generator expression not possible (, line 1) ########### Tests borrowed from or inspired by test_generators.py ############ Index: test_grammar.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_grammar.py,v retrieving revision 1.52 retrieving revision 1.53 diff -u -d -r1.52 -r1.53 --- test_grammar.py 9 Apr 2005 01:27:37 -0000 1.52 +++ test_grammar.py 20 Oct 2005 19:59:24 -0000 1.53 @@ -8,7 +8,7 @@ # regression test, the filterwarnings() call has been added to # regrtest.py. -from test.test_support import TestFailed, verify, check_syntax +from test.test_support import TestFailed, verify, vereq, check_syntax import sys print '1. Parser' @@ -157,28 +157,31 @@ def f3(two, arguments): pass def f4(two, (compound, (argument, list))): pass def f5((compound, first), two): pass -verify(f2.func_code.co_varnames == ('one_argument',)) -verify(f3.func_code.co_varnames == ('two', 'arguments')) +vereq(f2.func_code.co_varnames, ('one_argument',)) +vereq(f3.func_code.co_varnames, ('two', 'arguments')) if sys.platform.startswith('java'): - verify(f4.func_code.co_varnames == + vereq(f4.func_code.co_varnames, ('two', '(compound, (argument, list))', 'compound', 'argument', 'list',)) - verify(f5.func_code.co_varnames == + vereq(f5.func_code.co_varnames, ('(compound, first)', 'two', 'compound', 'first')) else: - verify(f4.func_code.co_varnames == ('two', '.2', 'compound', - 'argument', 'list')) - verify(f5.func_code.co_varnames == ('.0', 'two', 'compound', 'first')) + vereq(f4.func_code.co_varnames, + ('two', '.1', 'compound', 'argument', 'list')) + vereq(f5.func_code.co_varnames, + ('.0', 'two', 'compound', 'first')) def a1(one_arg,): pass def a2(two, args,): pass def v0(*rest): pass def v1(a, *rest): pass def v2(a, b, *rest): pass def v3(a, (b, c), *rest): return a, b, c, rest +# ceval unpacks the formal arguments into the first argcount names; +# thus, the names nested inside tuples must appear after these names. if sys.platform.startswith('java'): verify(v3.func_code.co_varnames == ('a', '(b, c)', 'rest', 'b', 'c')) else: - verify(v3.func_code.co_varnames == ('a', '.2', 'rest', 'b', 'c')) + vereq(v3.func_code.co_varnames, ('a', '.1', 'rest', 'b', 'c')) verify(v3(1, (2, 3), 4) == (1, 2, 3, (4,))) def d01(a=1): pass d01() @@ -410,6 +413,10 @@ def g2(): return 1 g1() x = g2() +check_syntax("class foo:return 1") + +print 'yield_stmt' +check_syntax("class foo:yield 1") print 'raise_stmt' # 'raise' test [',' test] try: raise RuntimeError, 'just testing' Index: test_import.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_import.py,v retrieving revision 1.19 retrieving revision 1.20 diff -u -d -r1.19 -r1.20 --- test_import.py 2 Aug 2004 03:58:27 -0000 1.19 +++ test_import.py 20 Oct 2005 19:59:24 -0000 1.20 @@ -192,3 +192,16 @@ del sys.modules[TESTFN] test_failing_reload() + +def test_import_name_binding(): + # import x.y.z binds x in the current namespace + import test as x + import test.test_support + assert x is test, x.__name__ + assert hasattr(test.test_support, "__file__") + + # import x.y.z as w binds z as w + import test.test_support as y + assert y is test.test_support, y.__name__ + +test_import_name_binding() Index: test_parser.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_parser.py,v retrieving revision 1.23 retrieving revision 1.24 diff -u -d -r1.23 -r1.24 --- test_parser.py 2 Aug 2005 00:46:43 -0000 1.23 +++ test_parser.py 20 Oct 2005 19:59:24 -0000 1.24 @@ -411,10 +411,32 @@ (0, '')) self.check_bad_tree(tree, "malformed global ast") + +class CompileTestCase(unittest.TestCase): + + # These tests are very minimal. :-( + + def test_compile_expr(self): + st = parser.expr('2 + 3') + code = parser.compilest(st) + self.assertEquals(eval(code), 5) + + def test_compile_suite(self): + st = parser.suite('x = 2; y = x + 3') + code = parser.compilest(st) + globs = {} + exec code in globs + self.assertEquals(globs['y'], 5) + + def test_compile_error(self): + st = parser.suite('1 = 3 + 4') + self.assertRaises(SyntaxError, parser.compilest, st) + def test_main(): test_support.run_unittest( RoundtripLegalSyntaxTestCase, - IllegalSyntaxTestCase + IllegalSyntaxTestCase, + CompileTestCase, ) Index: test_repr.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_repr.py,v retrieving revision 1.19 retrieving revision 1.20 diff -u -d -r1.19 -r1.20 --- test_repr.py 21 May 2004 23:01:18 -0000 1.19 +++ test_repr.py 20 Oct 2005 19:59:24 -0000 1.20 @@ -123,7 +123,7 @@ def test_lambda(self): self.failUnless(repr(lambda x: x).startswith( - "") @@ -13,8 +13,8 @@ inc = make_adder(1) plus10 = make_adder(10) -verify(inc(1) == 2) -verify(plus10(-2) == 8) +vereq(inc(1), 2) +vereq(plus10(-2), 8) print "2. extra nesting" @@ -28,8 +28,8 @@ inc = make_adder2(1) plus10 = make_adder2(10) -verify(inc(1) == 2) -verify(plus10(-2) == 8) +vereq(inc(1), 2) +vereq(plus10(-2), 8) print "3. simple nesting + rebinding" @@ -42,8 +42,8 @@ inc = make_adder3(0) plus10 = make_adder3(9) -verify(inc(1) == 2) -verify(plus10(-2) == 8) +vereq(inc(1), 2) +vereq(plus10(-2), 8) print "4. nesting with global but no free" @@ -58,10 +58,10 @@ global_x = 1 adder = make_adder4() -verify(adder(1) == 2) +vereq(adder(1), 2) global_x = 10 -verify(adder(-2) == 8) +vereq(adder(-2), 8) print "5. nesting through class" @@ -74,8 +74,8 @@ inc = make_adder5(1) plus10 = make_adder5(10) -verify(inc(1) == 2) -verify(plus10(-2) == 8) +vereq(inc(1), 2) +vereq(plus10(-2), 8) print "6. nesting plus free ref to global" @@ -89,8 +89,8 @@ inc = make_adder6(1) plus10 = make_adder6(10) -verify(inc(1) == 11) # there's only one global -verify(plus10(-2) == 8) +vereq(inc(1), 11) # there's only one global +vereq(plus10(-2), 8) print "7. nearest enclosing scope" @@ -103,7 +103,7 @@ return g(2) test_func = f(10) -verify(test_func(5) == 47) +vereq(test_func(5), 47) print "8. mixed freevars and cellvars" @@ -123,7 +123,7 @@ g = f(1, 2, 3) h = g(2, 4, 6) -verify(h() == 39) +vereq(h(), 39) print "9. free variable in method" @@ -141,9 +141,9 @@ return Test() t = test() -verify(t.test() == "var") -verify(t.method_and_var() == "method") -verify(t.actual_global() == "global") +vereq(t.test(), "var") +vereq(t.method_and_var(), "method") +vereq(t.actual_global(), "global") method_and_var = "var" class Test: @@ -158,9 +158,9 @@ return str(self) t = Test() -verify(t.test() == "var") -verify(t.method_and_var() == "method") -verify(t.actual_global() == "global") +vereq(t.test(), "var") +vereq(t.method_and_var(), "method") +vereq(t.actual_global(), "global") print "10. recursion" @@ -175,7 +175,7 @@ else: raise ValueError, "x must be >= 0" -verify(f(6) == 720) +vereq(f(6), 720) print "11. unoptimized namespaces" @@ -252,24 +252,24 @@ f1 = lambda x: lambda y: x + y inc = f1(1) plus10 = f1(10) -verify(inc(1) == 2) -verify(plus10(5) == 15) +vereq(inc(1), 2) +vereq(plus10(5), 15) f2 = lambda x: (lambda : lambda y: x + y)() inc = f2(1) plus10 = f2(10) -verify(inc(1) == 2) -verify(plus10(5) == 15) +vereq(inc(1), 2) +vereq(plus10(5), 15) f3 = lambda x: lambda y: global_x + y global_x = 1 inc = f3(None) -verify(inc(2) == 3) +vereq(inc(2), 3) f8 = lambda x, y, z: lambda a, b, c: lambda : z * (b + y) g = f8(1, 2, 3) h = g(2, 4, 6) -verify(h() == 18) +vereq(h(), 18) print "13. UnboundLocal" @@ -306,21 +306,21 @@ return lst return returner -verify(makeReturner(1,2,3)() == (1,2,3)) +vereq(makeReturner(1,2,3)(), (1,2,3)) def makeReturner2(**kwargs): def returner(): return kwargs return returner -verify(makeReturner2(a=11)()['a'] == 11) +vereq(makeReturner2(a=11)()['a'], 11) def makeAddPair((a, b)): def addPair((c, d)): return (a + c, b + d) return addPair -verify(makeAddPair((1, 2))((100, 200)) == (101,202)) +vereq(makeAddPair((1, 2))((100, 200)), (101,202)) print "15. scope of global statements" # Examples posted by Samuele Pedroni to python-dev on 3/1/2001 @@ -337,8 +337,8 @@ return h() return i() return g() -verify(f() == 7) -verify(x == 7) +vereq(f(), 7) +vereq(x, 7) # II x = 7 @@ -352,8 +352,8 @@ return h() return i() return g() -verify(f() == 2) -verify(x == 7) +vereq(f(), 2) +vereq(x, 7) # III x = 7 @@ -368,8 +368,8 @@ return h() return i() return g() -verify(f() == 2) -verify(x == 2) +vereq(f(), 2) +vereq(x, 2) # IV x = 7 @@ -384,8 +384,25 @@ return h() return i() return g() -verify(f() == 2) -verify(x == 2) +vereq(f(), 2) +vereq(x, 2) + +# XXX what about global statements in class blocks? +# do they affect methods? + +x = 12 +class Global: + global x + x = 13 + def set(self, val): + x = val + def get(self): + return x + +g = Global() +vereq(g.get(), 13) +g.set(15) +vereq(g.get(), 13) print "16. check leaks" @@ -407,7 +424,7 @@ for i in range(100): f1() -verify(Foo.count == 0) +vereq(Foo.count, 0) print "17. class and global" @@ -419,9 +436,9 @@ return Foo() x = 0 -verify(test(6)(2) == 8) +vereq(test(6)(2), 8) x = -1 -verify(test(3)(2) == 5) +vereq(test(3)(2), 5) print "18. verify that locals() works" @@ -437,7 +454,7 @@ d = f(2)(4) verify(d.has_key('h')) del d['h'] -verify(d == {'x': 2, 'y': 7, 'w': 6}) +vereq(d, {'x': 2, 'y': 7, 'w': 6}) print "19. var is bound and free in class" @@ -449,7 +466,7 @@ return C inst = f(3)() -verify(inst.a == inst.m()) +vereq(inst.a, inst.m()) print "20. interaction with trace function" From jhylton at users.sourceforge.net Thu Oct 20 21:59:28 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 20 Oct 2005 21:59:28 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Include Python-ast.h, 1.1, 2.1 asdl.h, 1.1, 2.1 ast.h, 1.1, 2.1 code.h, 1.1, 2.1 Python.h, 2.65, 2.66 compile.h, 2.41, 2.42 pyport.h, 2.71, 2.72 pythonrun.h, 2.67, 2.68 symtable.h, 2.13, 2.14 Message-ID: <20051020195928.2D95D1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2121/Include Modified Files: Python.h compile.h pyport.h pythonrun.h symtable.h Added Files: Python-ast.h asdl.h ast.h code.h Log Message: Merge ast-branch to head This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests. Index: Python.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/Python.h,v retrieving revision 2.65 retrieving revision 2.66 diff -u -d -r2.65 -r2.66 --- Python.h 27 Jul 2004 15:57:23 -0000 2.65 +++ Python.h 20 Oct 2005 19:59:24 -0000 2.66 @@ -128,8 +128,7 @@ #include "pystrtod.h" /* _Py_Mangle is defined in compile.c */ -PyAPI_FUNC(int) _Py_Mangle(char *p, char *name, \ - char *buffer, size_t maxlen); +PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name); /* PyArg_GetInt is deprecated and should not be used, use PyArg_Parse(). */ #define PyArg_GetInt(v, a) PyArg_Parse((v), "i", (a)) Index: compile.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/compile.h,v retrieving revision 2.41 retrieving revision 2.42 diff -u -d -r2.41 -r2.42 --- compile.h 12 Feb 2004 15:28:26 -0000 2.41 +++ compile.h 20 Oct 2005 19:59:24 -0000 2.42 @@ -1,5 +1,6 @@ - -/* Definitions for bytecode */ +#ifndef Py_CODE_H +#include "code.h" +#endif #ifndef Py_COMPILE_H #define Py_COMPILE_H @@ -7,55 +8,6 @@ extern "C" { #endif -/* Bytecode object */ -typedef struct { - PyObject_HEAD - int co_argcount; /* #arguments, except *args */ - int co_nlocals; /* #local variables */ - int co_stacksize; /* #entries needed for evaluation stack */ - int co_flags; /* CO_..., see below */ - PyObject *co_code; /* instruction opcodes */ - PyObject *co_consts; /* list (constants used) */ - PyObject *co_names; /* list of strings (names used) */ - PyObject *co_varnames; /* tuple of strings (local variable names) */ - PyObject *co_freevars; /* tuple of strings (free variable names) */ - PyObject *co_cellvars; /* tuple of strings (cell variable names) */ - /* The rest doesn't count for hash/cmp */ - PyObject *co_filename; /* string (where it was loaded from) */ - PyObject *co_name; /* string (name, for reference) */ - int co_firstlineno; /* first source line number */ - PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) */ -} PyCodeObject; - -/* Masks for co_flags above */ -#define CO_OPTIMIZED 0x0001 -#define CO_NEWLOCALS 0x0002 -#define CO_VARARGS 0x0004 -#define CO_VARKEYWORDS 0x0008 -#define CO_NESTED 0x0010 -#define CO_GENERATOR 0x0020 -/* The CO_NOFREE flag is set if there are no free or cell variables. - This information is redundant, but it allows a single flag test - to determine whether there is any extra work to be done when the - call frame it setup. -*/ -#define CO_NOFREE 0x0040 -/* XXX Temporary hack. Until generators are a permanent part of the - language, we need a way for a code object to record that generators - were *possible* when it was compiled. This is so code dynamically - compiled *by* a code object knows whether to allow yield stmts. In - effect, this passes on the "from __future__ import generators" state - in effect when the code block was compiled. */ -#define CO_GENERATOR_ALLOWED 0x1000 /* no longer used in an essential way */ -#define CO_FUTURE_DIVISION 0x2000 - -PyAPI_DATA(PyTypeObject) PyCode_Type; - -#define PyCode_Check(op) ((op)->ob_type == &PyCode_Type) -#define PyCode_GetNumFree(op) (PyTuple_GET_SIZE((op)->co_freevars)) - -#define CO_MAXBLOCKS 20 /* Max static block nesting within a function */ - /* Public interface */ struct _node; /* Declare the existence of this type */ PyAPI_FUNC(PyCodeObject *) PyNode_Compile(struct _node *, const char *); @@ -68,19 +20,22 @@ /* Future feature support */ typedef struct { - int ff_found_docstring; - int ff_last_lineno; - int ff_features; + int ff_features; /* flags set by future statements */ + int ff_lineno; /* line number of last future statement */ } PyFutureFeatures; -PyAPI_FUNC(PyFutureFeatures *) PyNode_Future(struct _node *, const char *); -PyAPI_FUNC(PyCodeObject *) PyNode_CompileFlags(struct _node *, const char *, - PyCompilerFlags *); - #define FUTURE_NESTED_SCOPES "nested_scopes" #define FUTURE_GENERATORS "generators" #define FUTURE_DIVISION "division" +struct _mod; /* Declare the existence of this type */ +DL_IMPORT(PyCodeObject *) PyAST_Compile(struct _mod *, const char *, + PyCompilerFlags *); +DL_IMPORT(PyFutureFeatures *) PyFuture_FromAST(struct _mod *, const char *); + +#define ERR_LATE_FUTURE \ +"from __future__ imports must occur at the beginning of the file" + #ifdef __cplusplus } #endif Index: pyport.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/pyport.h,v retrieving revision 2.71 retrieving revision 2.72 diff -u -d -r2.71 -r2.72 --- pyport.h 14 Sep 2005 17:49:54 -0000 2.71 +++ pyport.h 20 Oct 2005 19:59:24 -0000 2.72 @@ -583,6 +583,7 @@ #ifndef INT_MAX #define INT_MAX 2147483647 +#define INT_MIN (-INT_MAX - 1) #endif #ifndef LONG_MAX Index: pythonrun.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/pythonrun.h,v retrieving revision 2.67 retrieving revision 2.68 diff -u -d -r2.67 -r2.68 --- pythonrun.h 1 Aug 2005 21:39:27 -0000 2.67 +++ pythonrun.h 20 Oct 2005 19:59:24 -0000 2.68 @@ -29,46 +29,37 @@ PyAPI_FUNC(PyThreadState *) Py_NewInterpreter(void); PyAPI_FUNC(void) Py_EndInterpreter(PyThreadState *); -PyAPI_FUNC(int) PyRun_AnyFile(FILE *, const char *); -PyAPI_FUNC(int) PyRun_AnyFileEx(FILE *, const char *, int); - -PyAPI_FUNC(int) PyRun_AnyFileFlags(FILE *, const char *, PyCompilerFlags *); -PyAPI_FUNC(int) PyRun_AnyFileExFlags(FILE *, const char *, int, PyCompilerFlags *); - -PyAPI_FUNC(int) PyRun_SimpleString(const char *); +PyAPI_FUNC(int) PyRun_AnyFileFlags(FILE *, char *, PyCompilerFlags *); +PyAPI_FUNC(int) PyRun_AnyFileExFlags(FILE *, char *, int, PyCompilerFlags *); PyAPI_FUNC(int) PyRun_SimpleStringFlags(const char *, PyCompilerFlags *); -PyAPI_FUNC(int) PyRun_SimpleFile(FILE *, const char *); -PyAPI_FUNC(int) PyRun_SimpleFileEx(FILE *, const char *, int); PyAPI_FUNC(int) PyRun_SimpleFileExFlags(FILE *, const char *, int, PyCompilerFlags *); -PyAPI_FUNC(int) PyRun_InteractiveOne(FILE *, const char *); PyAPI_FUNC(int) PyRun_InteractiveOneFlags(FILE *, const char *, PyCompilerFlags *); -PyAPI_FUNC(int) PyRun_InteractiveLoop(FILE *, const char *); PyAPI_FUNC(int) PyRun_InteractiveLoopFlags(FILE *, const char *, PyCompilerFlags *); -PyAPI_FUNC(struct _node *) PyParser_SimpleParseString(const char *, int); -PyAPI_FUNC(struct _node *) PyParser_SimpleParseFile(FILE *, const char *, int); -PyAPI_FUNC(struct _node *) PyParser_SimpleParseStringFlags(const char *, int, int); -PyAPI_FUNC(struct _node *) PyParser_SimpleParseStringFlagsFilename(const char *, - const char *, - int, - int); +PyAPI_FUNC(struct _mod *) PyParser_ASTFromString(const char *, const char *, + int, PyCompilerFlags *flags); +PyAPI_FUNC(struct _mod *) PyParser_ASTFromFile(FILE *, const char *, int, + char *, char *, + PyCompilerFlags *, int *); +#define PyParser_SimpleParseString(S, B) \ + PyParser_SimpleParseStringFlags(S, B, 0) +#define PyParser_SimpleParseFile(FP, S, B) \ + PyParser_SimpleParseFileFlags(FP, S, B, 0) +PyAPI_FUNC(struct _node *) PyParser_SimpleParseStringFlags(const char *, int, + int); PyAPI_FUNC(struct _node *) PyParser_SimpleParseFileFlags(FILE *, const char *, int, int); -PyAPI_FUNC(PyObject *) PyRun_String(const char *, int, PyObject *, PyObject *); -PyAPI_FUNC(PyObject *) PyRun_File(FILE *, const char *, int, PyObject *, PyObject *); -PyAPI_FUNC(PyObject *) PyRun_FileEx(FILE *, const char *, int, - PyObject *, PyObject *, int); -PyAPI_FUNC(PyObject *) PyRun_StringFlags(const char *, int, PyObject *, PyObject *, - PyCompilerFlags *); -PyAPI_FUNC(PyObject *) PyRun_FileFlags(FILE *, const char *, int, PyObject *, - PyObject *, PyCompilerFlags *); -PyAPI_FUNC(PyObject *) PyRun_FileExFlags(FILE *, const char *, int, PyObject *, - PyObject *, int, PyCompilerFlags *); +PyAPI_FUNC(PyObject *) PyRun_StringFlags(const char *, int, PyObject *, + PyObject *, PyCompilerFlags *); -PyAPI_FUNC(PyObject *) Py_CompileString(const char *, const char *, int); +PyAPI_FUNC(PyObject *) PyRun_FileExFlags(FILE *, const char *, int, + PyObject *, PyObject *, int, + PyCompilerFlags *); + +#define Py_CompileString(str, p, s) Py_CompileStringFlags(str, p, s, NULL) PyAPI_FUNC(PyObject *) Py_CompileStringFlags(const char *, const char *, int, - PyCompilerFlags *); + PyCompilerFlags *); PyAPI_FUNC(struct symtable *) Py_SymtableString(const char *, const char *, int); PyAPI_FUNC(void) PyErr_Print(void); @@ -84,6 +75,25 @@ /* Bootstrap */ PyAPI_FUNC(int) Py_Main(int argc, char **argv); +/* Use macros for a bunch of old variants */ +#define PyRun_String(str, s, g, l) PyRun_StringFlags(str, s, g, l, NULL) +#define PyRun_AnyFile(fp, name) PyRun_AnyFileExFlags(fp, name, 0, NULL) +#define PyRun_AnyFileEx(fp, name, closeit) \ + PyRun_AnyFileExFlags(fp, name, closeit, NULL) +#define PyRun_AnyFileFlags(fp, name, flags) \ + PyRun_AnyFileExFlags(fp, name, 0, flags) +#define PyRun_SimpleString(s, f) PyRunSimpleStringFlags(s, f, NULL) +#define PyRun_SimpleFile(f, p) PyRun_SimpleFileExFlags(f, p, 0, NULL) +#define PyRun_SimpleFileEx(f, p, c) PyRun_SimpleFileExFlags(f, p, c, NULL) +#define PyRun_InteractiveOne(f, p) PyRun_InteractiveOneFlags(f, p, NULL) +#define PyRun_InteractiveLoop(f, p) PyRun_InteractiveLoopFlags(f, p, NULL) +#define PyRun_File(fp, p, s, g, l) \ + PyRun_FileExFlags(fp, p, s, g, l, 0, NULL) +#define PyRun_FileEx(fp, p, s, g, l, c) \ + PyRun_FileExFlags(fp, p, s, g, l, c, NULL) +#define PyRun_FileFlags(fp, p, s, g, l, flags) \ + PyRun_FileExFlags(fp, p, s, g, l, 0, flags) + /* In getpath.c */ PyAPI_FUNC(char *) Py_GetProgramFullPath(void); PyAPI_FUNC(char *) Py_GetPrefix(void); Index: symtable.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/symtable.h,v retrieving revision 2.13 retrieving revision 2.14 diff -u -d -r2.13 -r2.14 --- symtable.h 19 May 2004 08:20:05 -0000 2.13 +++ symtable.h 20 Oct 2005 19:59:24 -0000 2.14 @@ -4,64 +4,59 @@ extern "C" { #endif -/* A symbol table is constructed each time PyNode_Compile() is - called. The table walks the entire parse tree and identifies each - use or definition of a variable. - - The symbol table contains a dictionary for each code block in a - module: The symbol dictionary for the block. They keys of these - dictionaries are the name of all variables used or defined in the - block; the integer values are used to store several flags, - e.g. DEF_PARAM indicates that a variable is a parameter to a - function. -*/ +typedef enum _block_type { FunctionBlock, ClassBlock, ModuleBlock } + block_ty; struct _symtable_entry; struct symtable { - int st_pass; /* pass == 1 or 2 */ const char *st_filename; /* name of file being compiled */ struct _symtable_entry *st_cur; /* current symbol table entry */ + struct _symtable_entry *st_top; /* module entry */ PyObject *st_symbols; /* dictionary of symbol table entries */ PyObject *st_stack; /* stack of namespace info */ PyObject *st_global; /* borrowed ref to MODULE in st_symbols */ - int st_nscopes; /* number of scopes */ - int st_errors; /* number of errors */ + int st_nblocks; /* number of blocks */ char *st_private; /* name of current class or NULL */ + int st_tmpname; /* temporary name counter */ PyFutureFeatures *st_future; /* module's future features */ }; typedef struct _symtable_entry { PyObject_HEAD - PyObject *ste_id; /* int: key in st_symbols) */ - PyObject *ste_symbols; /* dict: name to flags) */ - PyObject *ste_name; /* string: name of scope */ + PyObject *ste_id; /* int: key in st_symbols */ + PyObject *ste_symbols; /* dict: name to flags */ + PyObject *ste_name; /* string: name of block */ PyObject *ste_varnames; /* list of variable names */ PyObject *ste_children; /* list of child ids */ - int ste_type; /* module, class, or function */ - int ste_lineno; /* first line of scope */ - int ste_optimized; /* true if namespace can't be optimized */ - int ste_nested; /* true if scope is nested */ - int ste_child_free; /* true if a child scope has free variables, + block_ty ste_type; /* module, class, or function */ + int ste_unoptimized; /* false if namespace is optimized */ + int ste_nested : 1; /* true if block is nested */ + int ste_free : 1; /* true if block has free variables */ + int ste_child_free : 1; /* true if a child block has free variables, including free refs to globals */ - int ste_generator; /* true if namespace is a generator */ + int ste_generator : 1; /* true if namespace is a generator */ + int ste_varargs : 1; /* true if block has varargs */ + int ste_varkeywords : 1; /* true if block has varkeywords */ + int ste_lineno; /* first line of block */ int ste_opt_lineno; /* lineno of last exec or import * */ - int ste_tmpname; /* temporary name counter */ + int ste_tmpname; /* counter for listcomp temp vars */ struct symtable *ste_table; -} PySymtableEntryObject; - -PyAPI_DATA(PyTypeObject) PySymtableEntry_Type; +} PySTEntryObject; -#define PySymtableEntry_Check(op) ((op)->ob_type == &PySymtableEntry_Type) +PyAPI_DATA(PyTypeObject) PySTEntry_Type; -PyAPI_FUNC(PyObject *) PySymtableEntry_New(struct symtable *, - char *, int, int); +#define PySTEntry_Check(op) ((op)->ob_type == &PySTEntry_Type) -PyAPI_FUNC(struct symtable *) PyNode_CompileSymtable(struct _node *, const char *); -PyAPI_FUNC(void) PySymtable_Free(struct symtable *); +PyAPI_FUNC(PySTEntryObject *) \ + PySTEntry_New(struct symtable *, identifier, block_ty, void *, int); +PyAPI_FUNC(int) PyST_GetScope(PySTEntryObject *, PyObject *); +PyAPI_FUNC(struct symtable *) PySymtable_Build(mod_ty, const char *, + PyFutureFeatures *); +PyAPI_FUNC(PySTEntryObject *) PySymtable_Lookup(struct symtable *, void *); -#define TOP "global" +PyAPI_FUNC(void) PySymtable_Free(struct symtable *); /* Flags for def-use information */ @@ -72,16 +67,19 @@ #define DEF_STAR 2<<3 /* parameter is star arg */ #define DEF_DOUBLESTAR 2<<4 /* parameter is star-star arg */ #define DEF_INTUPLE 2<<5 /* name defined in tuple in parameters */ -#define DEF_FREE 2<<6 /* name used but not defined in nested scope */ +#define DEF_FREE 2<<6 /* name used but not defined in nested block */ #define DEF_FREE_GLOBAL 2<<7 /* free variable is actually implicit global */ #define DEF_FREE_CLASS 2<<8 /* free variable from class's method */ #define DEF_IMPORT 2<<9 /* assignment occurred via import */ #define DEF_BOUND (DEF_LOCAL | DEF_PARAM | DEF_IMPORT) -#define TYPE_FUNCTION 1 -#define TYPE_CLASS 2 -#define TYPE_MODULE 3 +/* GLOBAL_EXPLICIT and GLOBAL_IMPLICIT are used internally by the symbol + table. GLOBAL is returned from PyST_GetScope() for either of them. + It is stored in ste_symbols at bits 12-14. +*/ +#define SCOPE_OFF 11 +#define SCOPE_MASK 7 #define LOCAL 1 #define GLOBAL_EXPLICIT 2 @@ -89,9 +87,14 @@ #define FREE 4 #define CELL 5 +/* The following three names are used for the ste_unoptimized bit field */ #define OPT_IMPORT_STAR 1 #define OPT_EXEC 2 #define OPT_BARE_EXEC 4 +#define OPT_TOPLEVEL 8 /* top-level names, including eval and exec */ + +#define GENERATOR 1 +#define GENERATOR_EXPRESSION 2 #define GENERATOR 1 #define GENERATOR_EXPRESSION 2 From jhylton at users.sourceforge.net Thu Oct 20 21:59:28 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 20 Oct 2005 21:59:28 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Tools/compiler dumppyc.py, 1.2, 1.3 Message-ID: <20051020195928.359471E4006@bag.python.org> Update of /cvsroot/python/python/dist/src/Tools/compiler In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2121/Tools/compiler Modified Files: dumppyc.py Log Message: Merge ast-branch to head This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests. Index: dumppyc.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Tools/compiler/dumppyc.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- dumppyc.py 17 Sep 2001 18:08:20 -0000 1.2 +++ dumppyc.py 20 Oct 2005 19:59:25 -0000 1.3 @@ -28,7 +28,7 @@ if type(obj) == types.CodeType: walk(obj, match) -def main(filename, codename=None): +def load(filename, codename=None): co = loadCode(filename) walk(co, codename) @@ -39,6 +39,9 @@ else: filename = sys.argv[1] codename = None - if filename.endswith('.py') and os.path.exists(filename+"c"): - filename += "c" - main(filename, codename) + if filename.endswith('.py'): + buf = open(filename).read() + co = compile(buf, filename, "exec") + walk(co) + else: + load(filename, codename) From jhylton at users.sourceforge.net Thu Oct 20 21:59:28 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 20 Oct 2005 21:59:28 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Parser Python.asdl, 1.1, 2.1 asdl.py, 1.1, 2.1 asdl_c.py, 1.1, 2.1 spark.py, 1.1, 2.1 .cvsignore, 2.1, 2.2 grammar.mak, 1.4, 1.5 parsetok.c, 2.37, 2.38 Message-ID: <20051020195928.4847E1E4007@bag.python.org> Update of /cvsroot/python/python/dist/src/Parser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2121/Parser Modified Files: .cvsignore grammar.mak parsetok.c Added Files: Python.asdl asdl.py asdl_c.py spark.py Log Message: Merge ast-branch to head This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests. Index: .cvsignore =================================================================== RCS file: /cvsroot/python/python/dist/src/Parser/.cvsignore,v retrieving revision 2.1 retrieving revision 2.2 diff -u -d -r2.1 -r2.2 --- .cvsignore 2 May 2000 18:34:01 -0000 2.1 +++ .cvsignore 20 Oct 2005 19:59:24 -0000 2.2 @@ -1,3 +1,6 @@ Makefile pgen add2lib +asdl.pyc +asdl_c.pyc +spark.pyc Index: grammar.mak =================================================================== RCS file: /cvsroot/python/python/dist/src/Parser/grammar.mak,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- grammar.mak 17 Jun 2003 00:05:53 -0000 1.4 +++ grammar.mak 20 Oct 2005 19:59:24 -0000 1.5 @@ -15,7 +15,7 @@ # particular case --pragma in PC\pyconfig.h, which demands that # python23.lib get linked in). -LIBS= ..\PCbuild\python23.lib +LIBS= ..\PCbuild\python25.lib CFLAGS= /I ..\Include /I ..\PC /D MS_NO_COREDLL /D PGEN /MD Index: parsetok.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Parser/parsetok.c,v retrieving revision 2.37 retrieving revision 2.38 diff -u -d -r2.37 -r2.38 --- parsetok.c 2 Oct 2005 01:48:51 -0000 2.37 +++ parsetok.c 20 Oct 2005 19:59:24 -0000 2.38 @@ -21,7 +21,7 @@ node * PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret) { - return PyParser_ParseStringFlags(s, g, start, err_ret, 0); + return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0); } node * @@ -56,7 +56,6 @@ return parsetok(tok, g, start, err_ret, flags); } - /* Parse input coming from a file. Return error code, print some errors. */ node * @@ -210,7 +209,7 @@ } static void -initerr(perrdetail *err_ret, const char* filename) +initerr(perrdetail *err_ret, const char *filename) { err_ret->error = E_OK; err_ret->filename = filename; From jhylton at users.sourceforge.net Thu Oct 20 21:59:28 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 20 Oct 2005 21:59:28 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Objects codeobject.c, 1.1, 2.1 frameobject.c, 2.79, 2.80 funcobject.c, 2.68, 2.69 typeobject.c, 2.269, 2.270 Message-ID: <20051020195928.4E35A1E4008@bag.python.org> Update of /cvsroot/python/python/dist/src/Objects In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2121/Objects Modified Files: frameobject.c funcobject.c typeobject.c Added Files: codeobject.c Log Message: Merge ast-branch to head This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests. Index: frameobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/frameobject.c,v retrieving revision 2.79 retrieving revision 2.80 diff -u -d -r2.79 -r2.80 --- frameobject.c 2 Jul 2004 06:41:06 -0000 2.79 +++ frameobject.c 20 Oct 2005 19:59:24 -0000 2.80 @@ -3,6 +3,7 @@ #include "Python.h" +#include "code.h" #include "compile.h" #include "frameobject.h" #include "opcode.h" Index: funcobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/funcobject.c,v retrieving revision 2.68 retrieving revision 2.69 diff -u -d -r2.68 -r2.69 --- funcobject.c 17 Feb 2005 10:37:21 -0000 2.68 +++ funcobject.c 20 Oct 2005 19:59:24 -0000 2.69 @@ -2,7 +2,7 @@ /* Function object implementation */ #include "Python.h" -#include "compile.h" +#include "code.h" #include "eval.h" #include "structmember.h" @@ -144,7 +144,9 @@ Py_XINCREF(closure); } else { - PyErr_SetString(PyExc_SystemError, "non-tuple closure"); + PyErr_Format(PyExc_SystemError, + "expected tuple for closure, got '%.100s'", + closure->ob_type->tp_name); return -1; } Py_XDECREF(((PyFunctionObject *) op) -> func_closure); Index: typeobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/typeobject.c,v retrieving revision 2.269 retrieving revision 2.270 diff -u -d -r2.269 -r2.270 --- typeobject.c 24 Sep 2005 22:58:41 -0000 2.269 +++ typeobject.c 20 Oct 2005 19:59:24 -0000 2.270 @@ -1737,20 +1737,14 @@ goto bad_slots; for (i = j = 0; i < nslots; i++) { char *s; - char buffer[256]; tmp = PyTuple_GET_ITEM(slots, i); s = PyString_AS_STRING(tmp); if ((add_dict && strcmp(s, "__dict__") == 0) || (add_weak && strcmp(s, "__weakref__") == 0)) continue; - if (_Py_Mangle(PyString_AS_STRING(name), - PyString_AS_STRING(tmp), - buffer, sizeof(buffer))) - { - tmp = PyString_FromString(buffer); - } else { - Py_INCREF(tmp); - } + tmp =_Py_Mangle(name, tmp); + if (!tmp) + goto bad_slots; PyTuple_SET_ITEM(newslots, j, tmp); j++; } From jhylton at users.sourceforge.net Thu Oct 20 21:59:28 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 20 Oct 2005 21:59:28 +0200 (CEST) Subject: [Python-checkins] python/dist/src/PCbuild pythoncore.vcproj, 1.29, 1.30 Message-ID: <20051020195928.6DBBB1E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/PCbuild In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2121/PCbuild Modified Files: pythoncore.vcproj Log Message: Merge ast-branch to head This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests. Index: pythoncore.vcproj =================================================================== RCS file: /cvsroot/python/python/dist/src/PCbuild/pythoncore.vcproj,v retrieving revision 1.29 retrieving revision 1.30 diff -u -d -r1.29 -r1.30 --- pythoncore.vcproj 24 Aug 2005 12:13:23 -0000 1.29 +++ pythoncore.vcproj 20 Oct 2005 19:59:24 -0000 1.30 @@ -3,6 +3,7 @@ ProjectType="Visual C++" Version="7.10" Name="pythoncore" + ProjectGUID="{7AFA1F0B-A8A1-455A-A832-BF263404BBEF}" SccProjectName="pythoncore" SccLocalPath=".."> @@ -478,6 +479,12 @@ + + + + @@ -802,10 +809,13 @@ + + + RelativePath="..\Objects\complexobject.c"> + RelativePath="..\PC\config.c"> + RelativePath="..\Modules\cPickle.c"> + RelativePath="..\Modules\cStringIO.c"> + RelativePath="..\Modules\datetimemodule.c"> + + - - + RelativePath="..\Objects\dictobject.c"> + RelativePath="..\PC\dl_nt.c"> + RelativePath="..\Python\dynload_win.c"> + RelativePath="..\Objects\enumobject.c"> + RelativePath="..\Modules\errnomodule.c"> + RelativePath="..\Python\errors.c"> + RelativePath="..\Python\exceptions.c"> + RelativePath="..\Objects\fileobject.c"> + RelativePath="..\Parser\firstsets.c"> + + + RelativePath="..\Objects\frameobject.c"> + RelativePath="..\Python\frozen.c"> + RelativePath="..\Objects\funcobject.c"> + RelativePath="..\Modules\functionalmodule.c"> + + @@ -2302,6 +2318,9 @@ + + @@ -2437,9 +2456,6 @@ - - @@ -2467,6 +2483,9 @@ + + @@ -2551,6 +2570,9 @@ RelativePath="..\Python\pystrtod.c"> + + @@ -2764,6 +2786,7 @@ @@ -2819,6 +2842,9 @@ +======= + RelativePath="..\Modules\sha256module.c"> +>>>>>>> 1.26.2.3 + RelativePath="..\Modules\sha512module.c"> + RelativePath="..\Modules\signalmodule.c"> + RelativePath="..\Objects\sliceobject.c"> + RelativePath="..\Objects\stringobject.c"> + RelativePath="..\Modules\stropmodule.c"> + RelativePath="..\Python\structmember.c"> + RelativePath="..\Modules\structmodule.c"> + RelativePath="..\Objects\structseq.c"> + + + + + + + + + + RelativePath="..\Python\symtable.c"> + RelativePath="..\Modules\symtablemodule.c"> + + + RelativePath="..\Python\thread.c"> + RelativePath="..\Modules\threadmodule.c"> + RelativePath="..\Modules\timemodule.c"> + + From jhylton at users.sourceforge.net Thu Oct 20 21:59:29 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Thu, 20 Oct 2005 21:59:29 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python Python-ast.c, 1.1, 2.1 asdl.c, 1.1, 2.1 ast.c, 1.1, 2.1 bltinmodule.c, 2.327, 2.328 ceval.c, 2.427, 2.428 compile.c, 2.352, 2.353 future.c, 2.15, 2.16 import.c, 2.245, 2.246 marshal.c, 1.88, 1.89 pythonrun.c, 2.217, 2.218 symtable.c, 2.12, 2.13 sysmodule.c, 2.130, 2.131 traceback.c, 2.42, 2.43 Message-ID: <20051020195929.8A7251E400D@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2121/Python Modified Files: bltinmodule.c ceval.c compile.c future.c import.c marshal.c pythonrun.c symtable.c sysmodule.c traceback.c Added Files: Python-ast.c asdl.c ast.c Log Message: Merge ast-branch to head This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests. Index: bltinmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/bltinmodule.c,v retrieving revision 2.327 retrieving revision 2.328 diff -u -d -r2.327 -r2.328 --- bltinmodule.c 24 Sep 2005 21:23:05 -0000 2.327 +++ bltinmodule.c 20 Oct 2005 19:59:25 -0000 2.328 @@ -1,9 +1,9 @@ - /* Built-in functions */ #include "Python.h" #include "node.h" +#include "code.h" #include "compile.h" #include "eval.h" Index: ceval.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/ceval.c,v retrieving revision 2.427 retrieving revision 2.428 diff -u -d -r2.427 -r2.428 --- ceval.c 20 Sep 2005 18:34:01 -0000 2.427 +++ ceval.c 20 Oct 2005 19:59:25 -0000 2.428 @@ -8,7 +8,7 @@ #include "Python.h" -#include "compile.h" +#include "code.h" #include "frameobject.h" #include "eval.h" #include "opcode.h" @@ -543,7 +543,7 @@ #ifdef LLTRACE int lltrace; #endif -#if defined(Py_DEBUG) || defined(LLTRACE) +#if defined(Py_DEBUG) /* Make it easier to find out where we are with a debugger */ char *filename; #endif @@ -743,9 +743,9 @@ f->f_stacktop = NULL; /* remains NULL unless yield suspends frame */ #ifdef LLTRACE - lltrace = PyDict_GetItemString(f->f_globals,"__lltrace__") != NULL; + lltrace = PyDict_GetItemString(f->f_globals, "__lltrace__") != NULL; #endif -#if defined(Py_DEBUG) || defined(LLTRACE) +#if defined(Py_DEBUG) filename = PyString_AsString(co->co_filename); #endif @@ -2257,23 +2257,11 @@ case MAKE_CLOSURE: { - int nfree; v = POP(); /* code object */ x = PyFunction_New(v, f->f_globals); - nfree = PyCode_GetNumFree((PyCodeObject *)v); Py_DECREF(v); - /* XXX Maybe this should be a separate opcode? */ - if (x != NULL && nfree > 0) { - v = PyTuple_New(nfree); - if (v == NULL) { - Py_DECREF(x); - x = NULL; - break; - } - while (--nfree >= 0) { - w = POP(); - PyTuple_SET_ITEM(v, nfree, w); - } + if (x != NULL) { + v = POP(); err = PyFunction_SetClosure(x, v); Py_DECREF(v); } @@ -2695,12 +2683,18 @@ if (co->co_flags & CO_VARKEYWORDS) nargs++; - /* Check for cells that shadow args */ - for (i = 0; i < f->f_ncells && j < nargs; ++i) { + /* Initialize each cell var, taking into account + cell vars that are initialized from arguments. + + Should arrange for the compiler to put cellvars + that are arguments at the beginning of the cellvars + list so that we can march over it more efficiently? + */ + for (i = 0; i < f->f_ncells; ++i) { cellname = PyString_AS_STRING( PyTuple_GET_ITEM(co->co_cellvars, i)); found = 0; - while (j < nargs) { + for (j = 0; j < nargs; j++) { argname = PyString_AS_STRING( PyTuple_GET_ITEM(co->co_varnames, j)); if (strcmp(cellname, argname) == 0) { @@ -2711,7 +2705,6 @@ found = 1; break; } - j++; } if (found == 0) { c = PyCell_New(NULL); @@ -2720,14 +2713,6 @@ SETLOCAL(f->f_nlocals + i, c); } } - /* Initialize any that are left */ - while (i < f->f_ncells) { - c = PyCell_New(NULL); - if (c == NULL) - goto fail; - SETLOCAL(f->f_nlocals + i, c); - i++; - } } if (f->f_nfreevars) { int i; Index: compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.352 retrieving revision 2.353 diff -u -d -r2.352 -r2.353 --- compile.c 3 Aug 2005 18:33:05 -0000 2.352 +++ compile.c 20 Oct 2005 19:59:25 -0000 2.353 @@ -1,385 +1,379 @@ -/* Compile an expression node to intermediate code */ - -/* XXX TO DO: - XXX add __doc__ attribute == co_doc to code object attributes? - XXX (it's currently the first item of the co_const tuple) - XXX Generate simple jump for break/return outside 'try...finally' - XXX Allow 'continue' inside finally clause of try-finally - XXX New opcode for loading the initial index for a for loop - XXX other JAR tricks? -*/ +/* [...8965 lines suppressed...] - symtable_add_def(st, STR(n), DEF_LOCAL | def_flag); - return; - default: - if (NCH(n) == 0) - return; - if (NCH(n) == 1) { - n = CHILD(n, 0); - goto loop; - } - /* Should only occur for errors like x + 1 = 1, - which will be caught in the next pass. */ - for (i = 0; i < NCH(n); ++i) - if (TYPE(CHILD(n, i)) >= single_input) - symtable_assign(st, CHILD(n, i), def_flag); - } + co = makecode(c, &a); + error: + assemble_free(&a); + return co; } Index: future.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/future.c,v retrieving revision 2.15 retrieving revision 2.16 diff -u -d -r2.15 -r2.16 --- future.c 4 Feb 2005 18:38:43 -0000 2.15 +++ future.c 20 Oct 2005 19:59:25 -0000 2.16 @@ -1,37 +1,30 @@ #include "Python.h" +#include "Python-ast.h" #include "node.h" #include "token.h" #include "graminit.h" +#include "code.h" #include "compile.h" #include "symtable.h" #define UNDEFINED_FUTURE_FEATURE "future feature %.100s is not defined" #define FUTURE_IMPORT_STAR "future statement does not support import *" -/* FUTURE_POSSIBLE() is provided to accomodate doc strings, which is - the only statement that can occur before a future statement. -*/ -#define FUTURE_POSSIBLE(FF) ((FF)->ff_last_lineno == -1) - static int -future_check_features(PyFutureFeatures *ff, node *n, const char *filename) +future_check_features(PyFutureFeatures *ff, stmt_ty s, const char *filename) { int i; - char *feature; - node *ch, *nn; + const char *feature; + asdl_seq *names; - REQ(n, import_from); - nn = CHILD(n, 3 + (TYPE(CHILD(n, 3)) == LPAR)); - if (TYPE(nn) == STAR) { - PyErr_SetString(PyExc_SyntaxError, FUTURE_IMPORT_STAR); - PyErr_SyntaxLocation(filename, nn->n_lineno); - return -1; - } - REQ(nn, import_as_names); - for (i = 0; i < NCH(nn); i += 2) { - ch = CHILD(nn, i); - REQ(ch, import_as_name); - feature = STR(CHILD(ch, 0)); + assert(s->kind == ImportFrom_kind); + + names = s->v.ImportFrom.names; + for (i = 0; i < asdl_seq_LEN(names); i++) { + alias_ty name = asdl_seq_GET(names, i); + feature = PyString_AsString(name->name); + if (!feature) + return 0; if (strcmp(feature, FUTURE_NESTED_SCOPES) == 0) { continue; } else if (strcmp(feature, FUTURE_GENERATORS) == 0) { @@ -41,218 +34,97 @@ } else if (strcmp(feature, "braces") == 0) { PyErr_SetString(PyExc_SyntaxError, "not a chance"); - PyErr_SyntaxLocation(filename, CHILD(ch, 0)->n_lineno); - return -1; + PyErr_SyntaxLocation(filename, s->lineno); + return 0; } else { PyErr_Format(PyExc_SyntaxError, UNDEFINED_FUTURE_FEATURE, feature); - PyErr_SyntaxLocation(filename, CHILD(ch, 0)->n_lineno); - return -1; + PyErr_SyntaxLocation(filename, s->lineno); + return 0; } } - return 0; + return 1; } -static void -future_error(node *n, const char *filename) +int +future_parse(PyFutureFeatures *ff, mod_ty mod, const char *filename) { - PyErr_SetString(PyExc_SyntaxError, - "from __future__ imports must occur at the " - "beginning of the file"); - PyErr_SyntaxLocation(filename, n->n_lineno); -} - -/* Relevant portions of the grammar: + int i, found_docstring = 0, done = 0, prev_line = 0; -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -file_input: (NEWLINE | stmt)* ENDMARKER -stmt: simple_stmt | compound_stmt -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt - | import_stmt | global_stmt | exec_stmt | assert_stmt -import_stmt: 'import' dotted_as_name (',' dotted_as_name)* - | 'from' dotted_name 'import' ('*' | import_as_name (',' import_as_name)*) -import_as_name: NAME [NAME NAME] -dotted_as_name: dotted_name [NAME NAME] -dotted_name: NAME ('.' NAME)* -*/ + static PyObject *future; + if (!future) { + future = PyString_InternFromString("__future__"); + if (!future) + return 0; + } -/* future_parse() finds future statements at the beginnning of a - module. The function calls itself recursively, rather than - factoring out logic for different kinds of statements into - different routines. + if (!(mod->kind == Module_kind || mod->kind == Interactive_kind)) + return 1; - Return values: - -1 indicates an error occurred, e.g. unknown feature name - 0 indicates no feature was found - 1 indicates a feature was found -*/ + /* A subsequent pass will detect future imports that don't + appear at the beginning of the file. There's one case, + however, that is easier to handl here: A series of imports + joined by semi-colons, where the first import is a future + statement but some subsequent import has the future form + but is preceded by a regular import. + */ + -static int -future_parse(PyFutureFeatures *ff, node *n, const char *filename) -{ - int i, r; - loop: - switch (TYPE(n)) { + for (i = 0; i < asdl_seq_LEN(mod->v.Module.body); i++) { + stmt_ty s = asdl_seq_GET(mod->v.Module.body, i); - case single_input: - if (TYPE(CHILD(n, 0)) == simple_stmt) { - n = CHILD(n, 0); - goto loop; - } - return 0; + if (done && s->lineno > prev_line) + return 1; + prev_line = s->lineno; - case file_input: - /* Check each statement in the file, starting with the - first, and continuing until the first statement - that isn't a future statement. + /* The tests below will return from this function unless it is + still possible to find a future statement. The only things + that can precede a future statement are another future + statement and a doc string. */ - for (i = 0; i < NCH(n); i++) { - node *ch = CHILD(n, i); - if (TYPE(ch) == stmt) { - r = future_parse(ff, ch, filename); - /* Need to check both conditions below - to accomodate doc strings, which - causes r < 0. - */ - if (r < 1 && !FUTURE_POSSIBLE(ff)) - return r; - } - } - return 0; - - case simple_stmt: - if (NCH(n) == 2) { - REQ(CHILD(n, 0), small_stmt); - n = CHILD(n, 0); - goto loop; - } else { - /* Deal with the special case of a series of - small statements on a single line. If a - future statement follows some other - statement, the SyntaxError is raised here. - In all other cases, the symtable pass - raises the exception. - */ - int found = 0, end_of_future = 0; - for (i = 0; i < NCH(n); i += 2) { - if (TYPE(CHILD(n, i)) == small_stmt) { - r = future_parse(ff, CHILD(n, i), - filename); - if (r < 1) - end_of_future = 1; - else { - found = 1; - if (end_of_future) { - future_error(n, - filename); - return -1; - } - } + if (s->kind == ImportFrom_kind) { + if (s->v.ImportFrom.module == future) { + if (done) { + PyErr_SetString(PyExc_SyntaxError, + ERR_LATE_FUTURE); + PyErr_SyntaxLocation(filename, + s->lineno); + return 0; } + if (!future_check_features(ff, s, filename)) + return 0; + ff->ff_lineno = s->lineno; } - - /* If we found one and only one, then the - current lineno is legal. - */ - if (found) - ff->ff_last_lineno = n->n_lineno + 1; else - ff->ff_last_lineno = n->n_lineno; - - if (end_of_future && found) - return 1; - else - return 0; - } - - case stmt: - if (TYPE(CHILD(n, 0)) == simple_stmt) { - n = CHILD(n, 0); - goto loop; - } else if (TYPE(CHILD(n, 0)) == expr_stmt) { - n = CHILD(n, 0); - goto loop; - } else { - REQ(CHILD(n, 0), compound_stmt); - ff->ff_last_lineno = n->n_lineno; - return 0; - } - - case small_stmt: - n = CHILD(n, 0); - goto loop; - - case import_stmt: { - node *name; - - n = CHILD(n, 0); - if (TYPE(n) != import_from) { - ff->ff_last_lineno = n->n_lineno; - return 0; - } - name = CHILD(n, 1); - if (strcmp(STR(CHILD(name, 0)), "__future__") != 0) - return 0; - if (future_check_features(ff, n, filename) < 0) - return -1; - ff->ff_last_lineno = n->n_lineno + 1; - return 1; - } - - /* The cases below -- all of them! -- are necessary to find - and skip doc strings. */ - case expr_stmt: - case testlist: - case test: - case and_test: - case not_test: - case comparison: - case expr: - case xor_expr: - case and_expr: - case shift_expr: - case arith_expr: - case term: - case factor: - case power: - if (NCH(n) == 1) { - n = CHILD(n, 0); - goto loop; + done = 1; } - ff->ff_last_lineno = n->n_lineno; - break; - - case atom: - if (TYPE(CHILD(n, 0)) == STRING - && ff->ff_found_docstring == 0) { - ff->ff_found_docstring = 1; - return 0; + else if (s->kind == Expr_kind && !found_docstring) { + expr_ty e = s->v.Expr.value; + if (e->kind != Str_kind) + done = 1; + else + found_docstring = 1; } - ff->ff_last_lineno = n->n_lineno; - return 0; - - default: - ff->ff_last_lineno = n->n_lineno; - return 0; + else + done = 1; } - return 0; + return 1; } + PyFutureFeatures * -PyNode_Future(node *n, const char *filename) +PyFuture_FromAST(mod_ty mod, const char *filename) { PyFutureFeatures *ff; ff = (PyFutureFeatures *)PyMem_Malloc(sizeof(PyFutureFeatures)); if (ff == NULL) return NULL; - ff->ff_found_docstring = 0; - ff->ff_last_lineno = -1; ff->ff_features = 0; + ff->ff_lineno = -1; - if (future_parse(ff, n, filename) < 0) { + if (!future_parse(ff, mod, filename)) { PyMem_Free((void *)ff); return NULL; } Index: import.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/import.c,v retrieving revision 2.245 retrieving revision 2.246 diff -u -d -r2.245 -r2.246 --- import.c 3 Oct 2005 04:48:15 -0000 2.245 +++ import.c 20 Oct 2005 19:59:25 -0000 2.246 @@ -3,10 +3,11 @@ #include "Python.h" -#include "node.h" -#include "token.h" +#include "Python-ast.h" +#include "pythonrun.h" #include "errcode.h" #include "marshal.h" +#include "code.h" #include "compile.h" #include "eval.h" #include "osdefs.h" @@ -766,17 +767,17 @@ /* Parse a source file and return the corresponding code object */ static PyCodeObject * -parse_source_module(char *pathname, FILE *fp) +parse_source_module(const char *pathname, FILE *fp) { - PyCodeObject *co; - node *n; - - n = PyParser_SimpleParseFile(fp, pathname, Py_file_input); - if (n == NULL) - return NULL; - co = PyNode_Compile(n, pathname); - PyNode_Free(n); + PyCodeObject *co = NULL; + mod_ty mod; + mod = PyParser_ASTFromFile(fp, pathname, Py_file_input, 0, 0, 0, + NULL); + if (mod) { + co = PyAST_Compile(mod, pathname, NULL); + free_mod(mod); + } return co; } Index: marshal.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/marshal.c,v retrieving revision 1.88 retrieving revision 1.89 diff -u -d -r1.88 -r1.89 --- marshal.c 16 Aug 2005 03:47:52 -0000 1.88 +++ marshal.c 20 Oct 2005 19:59:25 -0000 1.89 @@ -6,6 +6,7 @@ #include "Python.h" #include "longintrepr.h" +#include "code.h" #include "compile.h" #include "marshal.h" Index: pythonrun.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/pythonrun.c,v retrieving revision 2.217 retrieving revision 2.218 diff -u -d -r2.217 -r2.218 --- pythonrun.c 2 Oct 2005 01:48:50 -0000 2.217 +++ pythonrun.c 20 Oct 2005 19:59:25 -0000 2.218 @@ -3,13 +3,16 @@ #include "Python.h" +#include "Python-ast.h" #include "grammar.h" #include "node.h" #include "token.h" #include "parsetok.h" #include "errcode.h" +#include "code.h" #include "compile.h" #include "symtable.h" +#include "ast.h" #include "eval.h" #include "marshal.h" @@ -32,9 +35,9 @@ /* Forward */ static void initmain(void); static void initsite(void); -static PyObject *run_err_node(node *, const char *, PyObject *, PyObject *, +static PyObject *run_err_mod(mod_ty, const char *, PyObject *, PyObject *, PyCompilerFlags *); -static PyObject *run_node(node *, const char *, PyObject *, PyObject *, +static PyObject *run_mod(mod_ty, const char *, PyObject *, PyObject *, PyCompilerFlags *); static PyObject *run_pyc_file(FILE *, const char *, PyObject *, PyObject *, PyCompilerFlags *); @@ -634,25 +637,7 @@ /* Parse input from a file and execute it */ int -PyRun_AnyFile(FILE *fp, const char *filename) -{ - return PyRun_AnyFileExFlags(fp, filename, 0, NULL); -} - -int -PyRun_AnyFileFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) -{ - return PyRun_AnyFileExFlags(fp, filename, 0, flags); -} - -int -PyRun_AnyFileEx(FILE *fp, const char *filename, int closeit) -{ - return PyRun_AnyFileExFlags(fp, filename, closeit, NULL); -} - -int -PyRun_AnyFileExFlags(FILE *fp, const char *filename, int closeit, +PyRun_AnyFileExFlags(FILE *fp, char *filename, int closeit, PyCompilerFlags *flags) { if (filename == NULL) @@ -668,12 +653,6 @@ } int -PyRun_InteractiveLoop(FILE *fp, const char *filename) -{ - return PyRun_InteractiveLoopFlags(fp, filename, NULL); -} - -int PyRun_InteractiveLoopFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) { PyObject *v; @@ -708,12 +687,6 @@ } } -int -PyRun_InteractiveOne(FILE *fp, const char *filename) -{ - return PyRun_InteractiveOneFlags(fp, filename, NULL); -} - /* compute parser flags based on compiler flags */ #define PARSER_FLAGS(flags) \ (((flags) && (flags)->cf_flags & PyCF_DONT_IMPLY_DEDENT) ? \ @@ -723,9 +696,9 @@ PyRun_InteractiveOneFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) { PyObject *m, *d, *v, *w; - node *n; - perrdetail err; + mod_ty mod; char *ps1 = "", *ps2 = ""; + int errcode = 0; v = PySys_GetObject("ps1"); if (v != NULL) { @@ -743,26 +716,25 @@ else if (PyString_Check(w)) ps2 = PyString_AsString(w); } - n = PyParser_ParseFileFlags(fp, filename, &_PyParser_Grammar, - Py_single_input, ps1, ps2, &err, - PARSER_FLAGS(flags)); + mod = PyParser_ASTFromFile(fp, filename, + Py_single_input, ps1, ps2, + flags, &errcode); Py_XDECREF(v); Py_XDECREF(w); - if (n == NULL) { - if (err.error == E_EOF) { - if (err.text) - PyMem_DEL(err.text); + if (mod == NULL) { + if (errcode == E_EOF) { + PyErr_Clear(); return E_EOF; } - err_input(&err); PyErr_Print(); - return err.error; + return -1; } m = PyImport_AddModule("__main__"); if (m == NULL) return -1; d = PyModule_GetDict(m); - v = run_node(n, filename, d, d, flags); + v = run_mod(mod, filename, d, d, flags); + free_mod(mod); if (v == NULL) { PyErr_Print(); return -1; @@ -773,12 +745,6 @@ return 0; } -int -PyRun_SimpleFile(FILE *fp, const char *filename) -{ - return PyRun_SimpleFileEx(fp, filename, 0); -} - /* Check whether a file maybe a pyc file: Look at the extension, the file type, and, if we may close it, at the first few bytes. */ @@ -820,12 +786,6 @@ } int -PyRun_SimpleFileEx(FILE *fp, const char *filename, int closeit) -{ - return PyRun_SimpleFileExFlags(fp, filename, closeit, NULL); -} - -int PyRun_SimpleFileExFlags(FILE *fp, const char *filename, int closeit, PyCompilerFlags *flags) { @@ -874,12 +834,6 @@ } int -PyRun_SimpleString(const char *command) -{ - return PyRun_SimpleStringFlags(command, NULL); -} - -int PyRun_SimpleStringFlags(const char *command, PyCompilerFlags *flags) { PyObject *m, *d, *v; @@ -1054,6 +1008,8 @@ handle_system_exit(); } PyErr_Fetch(&exception, &v, &tb); + if (exception == NULL) + return; PyErr_NormalizeException(&exception, &v, &tb); if (exception == NULL) return; @@ -1195,74 +1151,48 @@ } PyObject * -PyRun_String(const char *str, int start, PyObject *globals, PyObject *locals) -{ - return run_err_node(PyParser_SimpleParseString(str, start), - "", globals, locals, NULL); -} - -PyObject * -PyRun_File(FILE *fp, const char *filename, int start, PyObject *globals, - PyObject *locals) -{ - return PyRun_FileEx(fp, filename, start, globals, locals, 0); -} - -PyObject * -PyRun_FileEx(FILE *fp, const char *filename, int start, PyObject *globals, - PyObject *locals, int closeit) -{ - node *n = PyParser_SimpleParseFile(fp, filename, start); - if (closeit) - fclose(fp); - return run_err_node(n, filename, globals, locals, NULL); -} - -PyObject * -PyRun_StringFlags(const char *str, int start, PyObject *globals, PyObject *locals, - PyCompilerFlags *flags) -{ - return run_err_node(PyParser_SimpleParseStringFlags( - str, start, PARSER_FLAGS(flags)), - "", globals, locals, flags); -} - -PyObject * -PyRun_FileFlags(FILE *fp, const char *filename, int start, PyObject *globals, - PyObject *locals, PyCompilerFlags *flags) +PyRun_StringFlags(const char *str, int start, PyObject *globals, + PyObject *locals, PyCompilerFlags *flags) { - return PyRun_FileExFlags(fp, filename, start, globals, locals, 0, - flags); + PyObject *ret; + mod_ty mod = PyParser_ASTFromString(str, "", start, flags); + ret = run_err_mod(mod, "", globals, locals, flags); + free_mod(mod); + return ret; } PyObject * PyRun_FileExFlags(FILE *fp, const char *filename, int start, PyObject *globals, PyObject *locals, int closeit, PyCompilerFlags *flags) { - node *n = PyParser_SimpleParseFileFlags(fp, filename, start, - PARSER_FLAGS(flags)); + PyObject *ret; + mod_ty mod = PyParser_ASTFromFile(fp, filename, start, 0, 0, + flags, NULL); + if (mod == NULL) + return NULL; if (closeit) fclose(fp); - return run_err_node(n, filename, globals, locals, flags); + ret = run_err_mod(mod, filename, globals, locals, flags); + free_mod(mod); + return ret; } static PyObject * -run_err_node(node *n, const char *filename, PyObject *globals, PyObject *locals, - PyCompilerFlags *flags) +run_err_mod(mod_ty mod, const char *filename, PyObject *globals, + PyObject *locals, PyCompilerFlags *flags) { - if (n == NULL) + if (mod == NULL) return NULL; - return run_node(n, filename, globals, locals, flags); + return run_mod(mod, filename, globals, locals, flags); } static PyObject * -run_node(node *n, const char *filename, PyObject *globals, PyObject *locals, +run_mod(mod_ty mod, const char *filename, PyObject *globals, PyObject *locals, PyCompilerFlags *flags) { PyCodeObject *co; PyObject *v; - co = PyNode_CompileFlags(n, filename, flags); - PyNode_Free(n); + co = PyAST_Compile(mod, filename, flags); if (co == NULL) return NULL; v = PyEval_EvalCode(co, globals, locals); @@ -1271,8 +1201,8 @@ } static PyObject * -run_pyc_file(FILE *fp, const char *filename, PyObject *globals, PyObject *locals, - PyCompilerFlags *flags) +run_pyc_file(FILE *fp, const char *filename, PyObject *globals, + PyObject *locals, PyCompilerFlags *flags) { PyCodeObject *co; PyObject *v; @@ -1303,41 +1233,77 @@ } PyObject * -Py_CompileString(const char *str, const char *filename, int start) -{ - return Py_CompileStringFlags(str, filename, start, NULL); -} - -PyObject * Py_CompileStringFlags(const char *str, const char *filename, int start, PyCompilerFlags *flags) { - node *n; + mod_ty mod; PyCodeObject *co; - - n = PyParser_SimpleParseStringFlagsFilename(str, filename, start, - PARSER_FLAGS(flags)); - if (n == NULL) + mod = PyParser_ASTFromString(str, filename, start, flags); + if (mod == NULL) return NULL; - co = PyNode_CompileFlags(n, filename, flags); - PyNode_Free(n); + co = PyAST_Compile(mod, filename, flags); + free_mod(mod); return (PyObject *)co; } struct symtable * Py_SymtableString(const char *str, const char *filename, int start) { - node *n; + mod_ty mod; struct symtable *st; - n = PyParser_SimpleParseStringFlagsFilename(str, filename, - start, 0); - if (n == NULL) + + mod = PyParser_ASTFromString(str, filename, start, NULL); + if (mod == NULL) return NULL; - st = PyNode_CompileSymtable(n, filename); - PyNode_Free(n); + st = PySymtable_Build(mod, filename, 0); + free_mod(mod); return st; } +/* Preferred access to parser is through AST. */ +mod_ty +PyParser_ASTFromString(const char *s, const char *filename, int start, + PyCompilerFlags *flags) +{ + node *n; + mod_ty mod; + perrdetail err; + n = PyParser_ParseStringFlagsFilename(s, filename, &_PyParser_Grammar, + start, &err, + PARSER_FLAGS(flags)); + if (n) { + mod = PyAST_FromNode(n, flags, filename); + PyNode_Free(n); + return mod; + } + else { + err_input(&err); + return NULL; + } +} + +mod_ty +PyParser_ASTFromFile(FILE *fp, const char *filename, int start, char *ps1, + char *ps2, PyCompilerFlags *flags, int *errcode) +{ + node *n; + mod_ty mod; + perrdetail err; + n = PyParser_ParseFileFlags(fp, filename, &_PyParser_Grammar, start, + ps1, ps2, &err, PARSER_FLAGS(flags)); + if (n) { + mod = PyAST_FromNode(n, flags, filename); + PyNode_Free(n); + return mod; + } + else { + err_input(&err); + if (errcode) + *errcode = err.error; + return NULL; + } +} + /* Simplified interface to parsefile -- return node or set exception */ node * @@ -1349,15 +1315,10 @@ (char *)0, (char *)0, &err, flags); if (n == NULL) err_input(&err); + return n; } -node * -PyParser_SimpleParseFile(FILE *fp, const char *filename, int start) -{ - return PyParser_SimpleParseFileFlags(fp, filename, start, 0); -} - /* Simplified interface to parsestring -- return node or set exception */ node * @@ -1373,12 +1334,6 @@ } node * -PyParser_SimpleParseString(const char *str, int start) -{ - return PyParser_SimpleParseStringFlags(str, start, 0); -} - -node * PyParser_SimpleParseStringFlagsFilename(const char *str, const char *filename, int start, int flags) { @@ -1418,12 +1373,6 @@ PyObject* u = NULL; char *msg = NULL; errtype = PyExc_SyntaxError; - v = Py_BuildValue("(ziiz)", err->filename, - err->lineno, err->offset, err->text); - if (err->text != NULL) { - PyMem_DEL(err->text); - err->text = NULL; - } switch (err->error) { case E_SYNTAX: errtype = PyExc_IndentationError; @@ -1450,11 +1399,9 @@ case E_INTR: if (!PyErr_Occurred()) PyErr_SetNone(PyExc_KeyboardInterrupt); - Py_XDECREF(v); return; case E_NOMEM: PyErr_NoMemory(); - Py_XDECREF(v); return; case E_EOF: msg = "unexpected EOF while parsing"; @@ -1498,7 +1445,15 @@ msg = "unknown parsing error"; break; } - w = Py_BuildValue("(sO)", msg, v); + v = Py_BuildValue("(ziiz)", err->filename, + err->lineno, err->offset, err->text); + if (err->text != NULL) { + PyMem_DEL(err->text); + err->text = NULL; + } + w = NULL; + if (v != NULL) + w = Py_BuildValue("(sO)", msg, v); Py_XDECREF(u); Py_XDECREF(v); PyErr_SetObject(errtype, w); @@ -1687,3 +1642,20 @@ return oldhandler; #endif } + +/* Deprecated C API functions still provided for binary compatiblity */ + +#undef PyParser_SimpleParseFile +#undef PyParser_SimpleParseString + +node * +PyParser_SimpleParseFile(FILE *fp, const char *filename, int start) +{ + return PyParser_SimpleParseFileFlags(fp, filename, start, 0); +} + +node * +PyParser_SimpleParseString(const char *str, int start) +{ + return PyParser_SimpleParseStringFlags(str, start, 0); +} Index: symtable.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/symtable.c,v retrieving revision 2.12 retrieving revision 2.13 diff -u -d -r2.12 -r2.13 --- symtable.c 19 May 2004 08:20:32 -0000 2.12 +++ symtable.c 20 Oct 2005 19:59:25 -0000 2.13 @@ -1,48 +1,35 @@ #include "Python.h" +#include "Python-ast.h" +#include "code.h" #include "compile.h" #include "symtable.h" -#include "graminit.h" #include "structmember.h" -/* The compiler uses this function to load a PySymtableEntry object - for a code block. Each block is loaded twice, once during the - symbol table pass and once during the code gen pass. Entries [...1286 lines suppressed...] + /* Outermost iterator is evaluated in current scope */ + VISIT(st, expr, outermost->iter); + /* Create generator scope for the rest */ + tmp = PyString_FromString(""); + if (!symtable_enter_block(st, tmp, FunctionBlock, (void *)e, 0)) { + return 0; + } + st->st_cur->ste_generator = 1; + /* Outermost iter is received as an argument */ + if (!symtable_implicit_arg(st, 0)) { + return 0; + } + VISIT(st, expr, outermost->target); + VISIT_SEQ(st, expr, outermost->ifs); + VISIT_SEQ_TAIL(st, comprehension, e->v.GeneratorExp.generators, 1); + VISIT(st, expr, e->v.GeneratorExp.elt); + if (!symtable_exit_block(st, (void *)e)) + return 0; + return 1; +} Index: sysmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/sysmodule.c,v retrieving revision 2.130 retrieving revision 2.131 diff -u -d -r2.130 -r2.131 --- sysmodule.c 3 Oct 2005 00:54:57 -0000 2.130 +++ sysmodule.c 20 Oct 2005 19:59:25 -0000 2.131 @@ -15,7 +15,7 @@ */ #include "Python.h" -#include "compile.h" +#include "code.h" #include "frameobject.h" #include "eval.h" Index: traceback.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/traceback.c,v retrieving revision 2.42 retrieving revision 2.43 diff -u -d -r2.42 -r2.43 --- traceback.c 21 Mar 2004 18:37:23 -0000 2.42 +++ traceback.c 20 Oct 2005 19:59:25 -0000 2.43 @@ -3,7 +3,7 @@ #include "Python.h" -#include "compile.h" +#include "code.h" #include "frameobject.h" #include "structmember.h" #include "osdefs.h" From nnorwitz at users.sourceforge.net Fri Oct 21 06:19:52 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Fri, 21 Oct 2005 06:19:52 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Include symtable.h,2.14,2.15 Message-ID: <20051021041952.D56671E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22365/Include Modified Files: symtable.h Log Message: Remove dup declarations after AST merge Index: symtable.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/symtable.h,v retrieving revision 2.14 retrieving revision 2.15 diff -u -d -r2.14 -r2.15 --- symtable.h 20 Oct 2005 19:59:24 -0000 2.14 +++ symtable.h 21 Oct 2005 04:19:49 -0000 2.15 @@ -96,9 +96,6 @@ #define GENERATOR 1 #define GENERATOR_EXPRESSION 2 -#define GENERATOR 1 -#define GENERATOR_EXPRESSION 2 - #ifdef __cplusplus } #endif From nnorwitz at users.sourceforge.net Fri Oct 21 06:23:39 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Fri, 21 Oct 2005 06:23:39 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Include code.h,2.1,2.2 Message-ID: <20051021042339.628DB1E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22879/Include Modified Files: code.h Log Message: Use the newer names for APIs after the AST merge Index: code.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/code.h,v retrieving revision 2.1 retrieving revision 2.2 diff -u -d -r2.1 -r2.2 --- code.h 20 Oct 2005 19:59:24 -0000 2.1 +++ code.h 21 Oct 2005 04:23:36 -0000 2.2 @@ -50,17 +50,17 @@ #define CO_MAXBLOCKS 20 /* Max static block nesting within a function */ -extern DL_IMPORT(PyTypeObject) PyCode_Type; +PyAPI_DATA(PyTypeObject) PyCode_Type; #define PyCode_Check(op) ((op)->ob_type == &PyCode_Type) #define PyCode_GetNumFree(op) (PyTuple_GET_SIZE((op)->co_freevars)) /* Public interface */ -DL_IMPORT(PyCodeObject *) PyCode_New( +PyAPI_FUNC(PyCodeObject *) PyCode_New( int, int, int, int, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, int, PyObject *); /* same as struct above */ -DL_IMPORT(int) PyCode_Addr2Line(PyCodeObject *, int); +PyAPI_FUNC(int) PyCode_Addr2Line(PyCodeObject *, int); /* for internal use only */ #define _PyCode_GETCODEPTR(co, pp) \ From nnorwitz at users.sourceforge.net Fri Oct 21 06:28:41 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Fri, 21 Oct 2005 06:28:41 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python ceval.c,2.428,2.429 Message-ID: <20051021042841.431721E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23590/Python Modified Files: ceval.c Log Message: Fix some mods that got dropped from the AST merge Index: ceval.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/ceval.c,v retrieving revision 2.428 retrieving revision 2.429 diff -u -d -r2.428 -r2.429 --- ceval.c 20 Oct 2005 19:59:25 -0000 2.428 +++ ceval.c 21 Oct 2005 04:28:38 -0000 2.429 @@ -543,7 +543,7 @@ #ifdef LLTRACE int lltrace; #endif -#if defined(Py_DEBUG) +#if defined(Py_DEBUG) || defined(LLTRACE) /* Make it easier to find out where we are with a debugger */ char *filename; #endif @@ -745,7 +745,7 @@ #ifdef LLTRACE lltrace = PyDict_GetItemString(f->f_globals, "__lltrace__") != NULL; #endif -#if defined(Py_DEBUG) +#if defined(Py_DEBUG) || defined(LLTRACE) filename = PyString_AsString(co->co_filename); #endif From nnorwitz at users.sourceforge.net Fri Oct 21 06:33:05 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Fri, 21 Oct 2005 06:33:05 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Include pyport.h,2.72,2.73 Message-ID: <20051021043305.723241E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24053/Include Modified Files: pyport.h Log Message: Remove INT_MIN that came from the AST merge. INT_MIN is used in Python/compile.c, but it was also used in Objects/abstract.c Python/getargs.c. If we need it for compile.c, we can get it from the same place as the other files. Index: pyport.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/pyport.h,v retrieving revision 2.72 retrieving revision 2.73 diff -u -d -r2.72 -r2.73 --- pyport.h 20 Oct 2005 19:59:24 -0000 2.72 +++ pyport.h 21 Oct 2005 04:33:02 -0000 2.73 @@ -583,7 +583,6 @@ #ifndef INT_MAX #define INT_MAX 2147483647 -#define INT_MIN (-INT_MAX - 1) #endif #ifndef LONG_MAX From nnorwitz at users.sourceforge.net Fri Oct 21 06:34:21 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Fri, 21 Oct 2005 06:34:21 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_generators.py, 1.49, 1.50 Message-ID: <20051021043421.24ACD1E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24342/Lib/test Modified Files: test_generators.py Log Message: Add comment lost from AST merge Index: test_generators.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_generators.py,v retrieving revision 1.49 retrieving revision 1.50 diff -u -d -r1.49 -r1.50 --- test_generators.py 20 Oct 2005 19:59:24 -0000 1.49 +++ test_generators.py 21 Oct 2005 04:34:18 -0000 1.50 @@ -774,7 +774,7 @@ ... try: ... 1//0 ... except ZeroDivisionError: -... yield 666 +... yield 666 # bad because *outer* try has finally ... except: ... pass ... finally: From nnorwitz at users.sourceforge.net Fri Oct 21 07:15:10 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Fri, 21 Oct 2005 07:15:10 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python ast.c,2.1,2.2 Message-ID: <20051021051510.A34181E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30117/Python Modified Files: ast.c Log Message: Oops, "=" is not a comparison operator Index: ast.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/ast.c,v retrieving revision 2.1 retrieving revision 2.2 diff -u -d -r2.1 -r2.2 --- ast.c 20 Oct 2005 19:59:25 -0000 2.1 +++ ast.c 21 Oct 2005 05:15:07 -0000 2.2 @@ -471,7 +471,6 @@ case GREATER: return Gt; case EQEQUAL: /* == */ - case EQUAL: return Eq; case LESSEQUAL: return LtE; From nnorwitz at users.sourceforge.net Fri Oct 21 08:00:28 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Fri, 21 Oct 2005 08:00:28 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/logging __init__.py, 1.32, 1.33 Message-ID: <20051021060028.777F81E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/logging In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5235/Lib/logging Modified Files: __init__.py Log Message: Don't use a string exception since it's deprecated Index: __init__.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/logging/__init__.py,v retrieving revision 1.32 retrieving revision 1.33 diff -u -d -r1.32 -r1.33 --- __init__.py 14 Oct 2005 09:36:35 -0000 1.32 +++ __init__.py 21 Oct 2005 06:00:24 -0000 1.33 @@ -64,7 +64,7 @@ def currentframe(): """Return the frame object for the caller's stack frame.""" try: - raise 'catch me' + raise Exception except: return sys.exc_traceback.tb_frame.f_back From nnorwitz at users.sourceforge.net Fri Oct 21 08:01:01 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Fri, 21 Oct 2005 08:01:01 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/logging __init__.py, 1.24.2.4, 1.24.2.5 Message-ID: <20051021060101.A019C1E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/logging In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5339/Lib/logging Modified Files: Tag: release24-maint __init__.py Log Message: Don't use a string exception since it's deprecated Index: __init__.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/logging/__init__.py,v retrieving revision 1.24.2.4 retrieving revision 1.24.2.5 diff -u -d -r1.24.2.4 -r1.24.2.5 --- __init__.py 14 Oct 2005 09:37:54 -0000 1.24.2.4 +++ __init__.py 21 Oct 2005 06:00:58 -0000 1.24.2.5 @@ -64,7 +64,7 @@ def currentframe(): """Return the frame object for the caller's stack frame.""" try: - raise 'catch me' + raise Exception except: return sys.exc_traceback.tb_frame.f_back From nnorwitz at users.sourceforge.net Fri Oct 21 08:05:36 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Fri, 21 Oct 2005 08:05:36 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Parser tokenizer.c,2.79,2.80 Message-ID: <20051021060536.5A69C1E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Parser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6422/Parser Modified Files: tokenizer.c Log Message: Free coding spec (cs) if there was an error to prevent mem leak. Maybe backport candidate Index: tokenizer.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Parser/tokenizer.c,v retrieving revision 2.79 retrieving revision 2.80 diff -u -d -r2.79 -r2.80 --- tokenizer.c 2 Oct 2005 01:48:51 -0000 2.79 +++ tokenizer.c 21 Oct 2005 06:05:33 -0000 2.80 @@ -277,11 +277,14 @@ tok->encoding = cs; tok->decoding_state = -1; } + else + PyMem_DEL(cs); #else /* Without Unicode support, we cannot process the coding spec. Since there won't be any Unicode literals, that won't matter. */ + PyMem_DEL(cs); #endif } } else { /* then, compare cs with BOM */ From nnorwitz at users.sourceforge.net Fri Oct 21 08:24:05 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Fri, 21 Oct 2005 08:24:05 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Grammar Grammar,1.54,1.55 Message-ID: <20051021062405.466C71E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Grammar In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8692/Grammar Modified Files: Grammar Log Message: Fix SF bug #1167751, Argument genexp corner case Incorrect code was generated for: foo(a = i for i in range(10)) This should have generated a SyntaxError. Fix the Grammar so it raises a SyntaxError and test it. I'm uncertain whether this should be backported. It makes something that was Syntactically valid invalid. However, the code would either be completely broken or do the wrong thing. Index: Grammar =================================================================== RCS file: /cvsroot/python/python/dist/src/Grammar/Grammar,v retrieving revision 1.54 retrieving revision 1.55 diff -u -d -r1.54 -r1.55 --- Grammar 2 Aug 2005 00:46:38 -0000 1.54 +++ Grammar 21 Oct 2005 06:24:02 -0000 1.55 @@ -102,7 +102,7 @@ classdef: 'class' NAME ['(' [testlist] ')'] ':' suite arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) -argument: [test '='] test [gen_for] # Really [keyword '='] test +argument: test [gen_for] | test '=' test ['(' gen_for ')'] # Really [keyword '='] test list_iter: list_for | list_if list_for: 'for' exprlist 'in' testlist_safe [list_iter] From nnorwitz at users.sourceforge.net Fri Oct 21 08:24:05 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Fri, 21 Oct 2005 08:24:05 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python graminit.c,2.40,2.41 Message-ID: <20051021062405.6210C1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8692/Python Modified Files: graminit.c Log Message: Fix SF bug #1167751, Argument genexp corner case Incorrect code was generated for: foo(a = i for i in range(10)) This should have generated a SyntaxError. Fix the Grammar so it raises a SyntaxError and test it. I'm uncertain whether this should be backported. It makes something that was Syntactically valid invalid. However, the code would either be completely broken or do the wrong thing. Index: graminit.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/graminit.c,v retrieving revision 2.40 retrieving revision 2.41 diff -u -d -r2.40 -r2.41 --- graminit.c 2 Aug 2005 00:46:46 -0000 2.40 +++ graminit.c 21 Oct 2005 06:24:02 -0000 2.41 @@ -1496,26 +1496,34 @@ {26, 1}, }; static arc arcs_69_1[3] = { - {25, 2}, - {147, 3}, + {147, 2}, + {25, 3}, {0, 1}, }; static arc arcs_69_2[1] = { - {26, 4}, + {0, 2}, }; static arc arcs_69_3[1] = { - {0, 3}, + {26, 4}, }; static arc arcs_69_4[2] = { - {147, 3}, + {13, 5}, {0, 4}, }; -static state states_69[5] = { +static arc arcs_69_5[1] = { + {147, 6}, +}; +static arc arcs_69_6[1] = { + {15, 2}, +}; +static state states_69[7] = { {1, arcs_69_0}, {3, arcs_69_1}, {1, arcs_69_2}, {1, arcs_69_3}, {2, arcs_69_4}, + {1, arcs_69_5}, + {1, arcs_69_6}, }; static arc arcs_70_0[2] = { {146, 1}, @@ -1806,7 +1814,7 @@ "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\002\000"}, {324, "arglist", 0, 8, states_68, "\000\040\010\060\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000\000"}, - {325, "argument", 0, 5, states_69, + {325, "argument", 0, 7, states_69, "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000\000"}, {326, "list_iter", 0, 2, states_70, "\000\000\000\000\000\000\000\000\000\000\000\042\000\000\000\000\000\000\000\000\000"}, From nnorwitz at users.sourceforge.net Fri Oct 21 08:24:05 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Fri, 21 Oct 2005 08:24:05 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_genexps.py, 1.9, 1.10 Message-ID: <20051021062405.8360E1E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8692/Lib/test Modified Files: test_genexps.py Log Message: Fix SF bug #1167751, Argument genexp corner case Incorrect code was generated for: foo(a = i for i in range(10)) This should have generated a SyntaxError. Fix the Grammar so it raises a SyntaxError and test it. I'm uncertain whether this should be backported. It makes something that was Syntactically valid invalid. However, the code would either be completely broken or do the wrong thing. Index: test_genexps.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_genexps.py,v retrieving revision 1.9 retrieving revision 1.10 diff -u -d -r1.9 -r1.10 --- test_genexps.py 20 Oct 2005 19:59:24 -0000 1.9 +++ test_genexps.py 21 Oct 2005 06:24:02 -0000 1.10 @@ -82,6 +82,18 @@ ... SyntaxError: invalid syntax +Verify that parenthesis are required when used as a keyword argument value + + >>> dict(a = i for i in xrange(10)) + Traceback (most recent call last): + ... + SyntaxError: invalid syntax + +Verify that parenthesis are required when used as a keyword argument value + + >>> dict(a = (i for i in xrange(10))) #doctest: +ELLIPSIS + {'a': } + Verify early binding for the outermost for-expression >>> x=10 @@ -125,12 +137,12 @@ >>> (y for y in (1,2)) = 10 Traceback (most recent call last): ... - SyntaxError: assignment to generator expression not possible (, line 1) + SyntaxError: assignment to generator expression not possible (, line 1) >>> (y for y in (1,2)) += 10 Traceback (most recent call last): ... - SyntaxError: augmented assignment to generator expression not possible (, line 1) + SyntaxError: augmented assignment to generator expression not possible (, line 1) ########### Tests borrowed from or inspired by test_generators.py ############ From nnorwitz at users.sourceforge.net Fri Oct 21 08:25:37 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Fri, 21 Oct 2005 08:25:37 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS,1.1390,1.1391 Message-ID: <20051021062537.4FA9B1E4338@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9310/Misc Modified Files: NEWS Log Message: Incorrect code was generated for: foo(a = i for i in range(10)) This should have generated a SyntaxError. Fix the Grammar so it raises a SyntaxError and test it. Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1390 retrieving revision 1.1391 diff -u -d -r1.1390 -r1.1391 --- NEWS 9 Oct 2005 19:42:27 -0000 1.1390 +++ NEWS 21 Oct 2005 06:25:33 -0000 1.1391 @@ -12,6 +12,9 @@ Core and builtins ----------------- +- SF bug #116775: fix incorrect code being for generator expressions. + The following code now raises a SyntaxError: foo(a = i for i in range(10)) + - SF Bug #976608: fix SystemError when mtime of an imported file is -1. - SF Bug #887946: fix segfault when redirecting stdin from a directory. From nnorwitz at users.sourceforge.net Fri Oct 21 08:26:13 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Fri, 21 Oct 2005 08:26:13 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS,1.1391,1.1392 Message-ID: <20051021062613.5E1981E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9378/Misc Modified Files: NEWS Log Message: Get the bug # write Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1391 retrieving revision 1.1392 diff -u -d -r1.1391 -r1.1392 --- NEWS 21 Oct 2005 06:25:33 -0000 1.1391 +++ NEWS 21 Oct 2005 06:26:10 -0000 1.1392 @@ -12,7 +12,7 @@ Core and builtins ----------------- -- SF bug #116775: fix incorrect code being for generator expressions. +- SF bug #1167751: fix incorrect code being for generator expressions. The following code now raises a SyntaxError: foo(a = i for i in range(10)) - SF Bug #976608: fix SystemError when mtime of an imported file is -1. From nnorwitz at users.sourceforge.net Fri Oct 21 08:32:05 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Fri, 21 Oct 2005 08:32:05 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS,1.1392,1.1393 Message-ID: <20051021063205.DEF4E1E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv10156/Misc Modified Files: NEWS Log Message: add some notes about recent checkins Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1392 retrieving revision 1.1393 diff -u -d -r1.1392 -r1.1393 --- NEWS 21 Oct 2005 06:26:10 -0000 1.1392 +++ NEWS 21 Oct 2005 06:32:02 -0000 1.1393 @@ -12,6 +12,10 @@ Core and builtins ----------------- +- Speed up some Unicode operations. + +- A new AST parser implementation was completed. + - SF bug #1167751: fix incorrect code being for generator expressions. The following code now raises a SyntaxError: foo(a = i for i in range(10)) @@ -165,6 +169,8 @@ Extension Modules ----------------- +- Get bsddb module to build with BSD DB version 3.2 + - Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1, but Python incorrectly assumes it is in UTF-8 format From birkenfeld at users.sourceforge.net Fri Oct 21 09:53:48 2005 From: birkenfeld at users.sourceforge.net (birkenfeld@users.sourceforge.net) Date: Fri, 21 Oct 2005 09:53:48 +0200 (CEST) Subject: [Python-checkins] python/dist/src LICENSE,1.34,1.35 Message-ID: <20051021075348.575081E4004@bag.python.org> Update of /cvsroot/python/python/dist/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26336 Modified Files: LICENSE Log Message: Add 2.4.2 to LICENSE. Index: LICENSE =================================================================== RCS file: /cvsroot/python/python/dist/src/LICENSE,v retrieving revision 1.34 retrieving revision 1.35 diff -u -d -r1.34 -r1.35 --- LICENSE 10 Mar 2005 13:47:17 -0000 1.34 +++ LICENSE 21 Oct 2005 07:53:44 -0000 1.35 @@ -50,6 +50,7 @@ 2.3.5 2.3.4 2005 PSF yes 2.4 2.3 2004 PSF yes 2.4.1 2.4 2005 PSF yes + 2.4.2 2.4.1 2005 PSF yes Footnotes: From mwh at users.sourceforge.net Fri Oct 21 13:32:25 2005 From: mwh at users.sourceforge.net (mwh@users.sourceforge.net) Date: Fri, 21 Oct 2005 13:32:25 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python import.c,2.246,2.247 Message-ID: <20051021113225.3F6211E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv11048 Modified Files: import.c Log Message: the ast-branch changed the stack discipline of MAKE_CLOSURE, so we need to bump MAGIC. Index: import.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/import.c,v retrieving revision 2.246 retrieving revision 2.247 diff -u -d -r2.246 -r2.247 --- import.c 20 Oct 2005 19:59:25 -0000 2.246 +++ import.c 21 Oct 2005 11:32:20 -0000 2.247 @@ -52,8 +52,10 @@ Python 2.4a3: 62051 Python 2.4b1: 62061 Python 2.5a0: 62071 + Python 2.5a0: 62081 (ast-branch) +. */ -#define MAGIC (62071 | ((long)'\r'<<16) | ((long)'\n'<<24)) +#define MAGIC (62081 | ((long)'\r'<<16) | ((long)'\n'<<24)) /* Magic word as global; note that _PyImport_Init() can change the value of this global to accommodate for alterations of how the From mwh at users.sourceforge.net Fri Oct 21 13:45:04 2005 From: mwh at users.sourceforge.net (mwh@users.sourceforge.net) Date: Fri, 21 Oct 2005 13:45:04 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Objects stringobject.c, 2.234, 2.235 unicodeobject.c, 2.234, 2.235 Message-ID: <20051021114504.732B11E400F@bag.python.org> Update of /cvsroot/python/python/dist/src/Objects In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12867/Objects Modified Files: stringobject.c unicodeobject.c Log Message: Fix bug: [ 1327110 ] wrong TypeError traceback in generator expressions by removing the code that can stomp on the users' TypeError raised by the iterable argument to ''.join() -- PySequence_Fast (now?) gives a perfectly reasonable message itself. Also, a couple of tests. Index: stringobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v retrieving revision 2.234 retrieving revision 2.235 diff -u -d -r2.234 -r2.235 --- stringobject.c 20 Oct 2005 04:15:52 -0000 2.234 +++ stringobject.c 21 Oct 2005 11:45:00 -0000 2.235 @@ -1620,10 +1620,6 @@ seq = PySequence_Fast(orig, ""); if (seq == NULL) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) - PyErr_Format(PyExc_TypeError, - "sequence expected, %.80s found", - orig->ob_type->tp_name); return NULL; } Index: unicodeobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v retrieving revision 2.234 retrieving revision 2.235 diff -u -d -r2.234 -r2.235 --- unicodeobject.c 19 Oct 2005 22:39:02 -0000 2.234 +++ unicodeobject.c 21 Oct 2005 11:45:00 -0000 2.235 @@ -4148,10 +4148,6 @@ fseq = PySequence_Fast(seq, ""); if (fseq == NULL) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) - PyErr_Format(PyExc_TypeError, - "sequence expected, %.80s found", - seq->ob_type->tp_name); return NULL; } From mwh at users.sourceforge.net Fri Oct 21 13:45:04 2005 From: mwh at users.sourceforge.net (mwh@users.sourceforge.net) Date: Fri, 21 Oct 2005 13:45:04 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_string.py, 1.27, 1.28 string_tests.py, 1.45, 1.46 Message-ID: <20051021114504.AA3AB1E4008@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12867/Lib/test Modified Files: test_string.py string_tests.py Log Message: Fix bug: [ 1327110 ] wrong TypeError traceback in generator expressions by removing the code that can stomp on the users' TypeError raised by the iterable argument to ''.join() -- PySequence_Fast (now?) gives a perfectly reasonable message itself. Also, a couple of tests. Index: test_string.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_string.py,v retrieving revision 1.27 retrieving revision 1.28 diff -u -d -r1.27 -r1.28 --- test_string.py 26 Aug 2004 16:53:04 -0000 1.27 +++ test_string.py 21 Oct 2005 11:45:01 -0000 1.28 @@ -51,6 +51,17 @@ self.checkraises(TypeError, string_tests.BadSeq1(), 'join', ' ') self.checkequal('a b c', string_tests.BadSeq2(), 'join', ' ') + try: + def f(): + yield 4 + "" + self.fixtype(' ').join(f()) + except TypeError, e: + if '+' not in str(e): + self.fail('join() ate exception message') + else: + self.fail('exception not raised') + + class ModuleTest(unittest.TestCase): Index: string_tests.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/string_tests.py,v retrieving revision 1.45 retrieving revision 1.46 diff -u -d -r1.45 -r1.46 --- string_tests.py 28 Jul 2005 16:49:15 -0000 1.45 +++ string_tests.py 21 Oct 2005 11:45:01 -0000 1.46 @@ -657,6 +657,15 @@ self.checkraises(TypeError, ' ', 'join') self.checkraises(TypeError, ' ', 'join', 7) self.checkraises(TypeError, ' ', 'join', Sequence([7, 'hello', 123L])) + try: + def f(): + yield 4 + "" + self.fixtype(' ').join(f()) + except TypeError, e: + if '+' not in str(e): + self.fail('join() ate exception message') + else: + self.fail('exception not raised') def test_formatting(self): self.checkequal('+hello+', '+%s+', '__mod__', 'hello') From arigo at users.sourceforge.net Fri Oct 21 14:57:36 2005 From: arigo at users.sourceforge.net (arigo@users.sourceforge.net) Date: Fri, 21 Oct 2005 14:57:36 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python ast.c, 2.2, 2.3 compile.c, 2.353, 2.354 symtable.c, 2.13, 2.14 Message-ID: <20051021125736.DC2D11E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32569/Python Modified Files: ast.c compile.c symtable.c Log Message: ANSI-C-ify the placement of local var declarations. Index: ast.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/ast.c,v retrieving revision 2.2 retrieving revision 2.3 diff -u -d -r2.2 -r2.3 --- ast.c 21 Oct 2005 05:15:07 -0000 2.2 +++ ast.c 21 Oct 2005 12:57:31 -0000 2.3 @@ -512,14 +512,14 @@ seq_for_testlist(struct compiling *c, const node *n) { /* testlist: test (',' test)* [','] */ + asdl_seq *seq; + expr_ty expression; + int i; assert(TYPE(n) == testlist || TYPE(n) == listmaker || TYPE(n) == testlist_gexp || TYPE(n) == testlist_safe ); - asdl_seq *seq; - expr_ty expression; - int i; seq = asdl_seq_new((NCH(n) + 1) / 2); if (!seq) @@ -641,12 +641,13 @@ compiler_complex_args(CHILD(ch, 1))); } else if (TYPE(CHILD(ch, 0)) == NAME) { + expr_ty name; if (!strcmp(STR(CHILD(ch, 0)), "None")) { ast_error(CHILD(ch, 0), "assignment to None"); goto error; } - expr_ty name = Name(NEW_IDENTIFIER(CHILD(ch, 0)), - Param, LINENO(ch)); + name = Name(NEW_IDENTIFIER(CHILD(ch, 0)), + Param, LINENO(ch)); if (!name) goto error; asdl_seq_APPEND(args, name); @@ -1897,12 +1898,13 @@ if (!targets) return NULL; for (i = 0; i < NCH(n) - 2; i += 2) { + expr_ty e; node *ch = CHILD(n, i); if (TYPE(ch) == yield_expr) { ast_error(ch, "assignment to yield expression not possible"); goto error; } - expr_ty e = ast_for_testlist(c, ch, 0); + e = ast_for_testlist(c, ch, 0); /* set context to assign */ if (!e) Index: compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.353 retrieving revision 2.354 diff -u -d -r2.353 -r2.354 --- compile.c 20 Oct 2005 19:59:25 -0000 2.353 +++ compile.c 21 Oct 2005 12:57:31 -0000 2.354 @@ -2329,8 +2329,9 @@ src = dot + 1; while (dot) { /* NB src is only defined when dot != NULL */ + PyObject *attr; dot = strchr(src, '.'); - PyObject *attr = PyString_FromStringAndSize(src, + attr = PyString_FromStringAndSize(src, dot ? dot - src : strlen(src)); ADDOP_O(c, LOAD_ATTR, attr, names); src = dot + 1; Index: symtable.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/symtable.c,v retrieving revision 2.13 retrieving revision 2.14 diff -u -d -r2.13 -r2.14 --- symtable.c 20 Oct 2005 19:59:25 -0000 2.13 +++ symtable.c 21 Oct 2005 12:57:31 -0000 2.14 @@ -444,12 +444,13 @@ static int check_unoptimized(const PySTEntryObject* ste) { char buf[300]; + const char* trailer; if (ste->ste_type == ModuleBlock || !ste->ste_unoptimized || !(ste->ste_free || ste->ste_child_free)) return 1; - const char* trailer = (ste->ste_child_free ? + trailer = (ste->ste_child_free ? "contains a nested function with free variables" : "is a nested function"); @@ -621,8 +622,9 @@ /* Recursively call analyze_block() on each child block */ for (i = 0; i < PyList_GET_SIZE(ste->ste_children); ++i) { PyObject *c = PyList_GET_ITEM(ste->ste_children, i); + PySTEntryObject* entry; assert(c && PySTEntry_Check(c)); - PySTEntryObject* entry = (PySTEntryObject*)c; + entry = (PySTEntryObject*)c; if (!analyze_block(entry, newbound, newfree, newglobal)) goto error; if (entry->ste_free || entry->ste_child_free) From lemburg at users.sourceforge.net Fri Oct 21 15:45:20 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Fri, 21 Oct 2005 15:45:20 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Tools/scripts gencodec.py, 1.9, NONE Message-ID: <20051021134520.AD61B1E40EC@bag.python.org> Update of /cvsroot/python/python/dist/src/Tools/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13659/Tools/scripts Removed Files: gencodec.py Log Message: Moved gencodec.py to the Tools/unicode/ directory. Added new support for decoding tables. Cleaned up the implementation a bit. --- gencodec.py DELETED --- From lemburg at users.sourceforge.net Fri Oct 21 15:45:21 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Fri, 21 Oct 2005 15:45:21 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Tools/unicode gencodec.py, NONE, 1.1 Message-ID: <20051021134521.3F0AD1E40FF@bag.python.org> Update of /cvsroot/python/python/dist/src/Tools/unicode In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13659/Tools/unicode Added Files: gencodec.py Log Message: Moved gencodec.py to the Tools/unicode/ directory. Added new support for decoding tables. Cleaned up the implementation a bit. --- NEW FILE: gencodec.py --- """ Unicode Mapping Parser and Codec Generator. This script parses Unicode mapping files as available from the Unicode site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec modules from them. The codecs use the standard character mapping codec to actually apply the mapping. Synopsis: gencodec.py dir codec_prefix All files in dir are scanned and those producing non-empty mappings will be written to .py with being the first part of the map's filename ('a' in a.b.c.txt) converted to lowercase with hyphens replaced by underscores. The tool also writes marshalled versions of the mapping tables to the same location (with .mapping extension). Written by Marc-Andre Lemburg (mal at lemburg.com). Modified to generate Unicode table maps for decoding. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright Guido van Rossum, 2000. (c) Copyright Marc-Andre Lemburg, 2005. """#" import re, os, time, marshal, codecs # Maximum allowed size of charmap tables MAX_TABLE_SIZE = 8192 # Standard undefined Unicode code point UNI_UNDEFINED = unichr(0xFFFE) mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)' '\s+' '((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)' '\s*' '(#.+)?') def parsecodes(codes, len=len, filter=filter,range=range): """ Converts code combinations to either a single code integer or a tuple of integers. meta-codes (in angular brackets, e.g. and ) are ignored. Empty codes or illegal ones are returned as None. """ if not codes: return None l = codes.split('+') if len(l) == 1: return int(l[0],16) for i in range(len(l)): try: l[i] = int(l[i],16) except ValueError: l[i] = None l = filter(lambda x: x is not None, l) if len(l) == 1: return l[0] else: return tuple(l) def readmap(filename): f = open(filename,'r') lines = f.readlines() f.close() enc2uni = {} identity = [] unmapped = range(256) # UTC mapping tables per convention don't include the identity # mappings for code points 0x00 - 0x1F and 0x7F, unless these are # explicitly mapped to different characters or undefined for i in range(32) + [127]: identity.append(i) unmapped.remove(i) enc2uni[i] = (i, 'CONTROL CHARACTER') for line in lines: line = line.strip() if not line or line[0] == '#': continue m = mapRE.match(line) if not m: #print '* not matched: %s' % repr(line) continue enc,uni,comment = m.groups() enc = parsecodes(enc) uni = parsecodes(uni) if comment is None: comment = '' else: comment = comment[1:].strip() if enc < 256: if enc in unmapped: unmapped.remove(enc) if enc == uni: identity.append(enc) enc2uni[enc] = (uni,comment) else: enc2uni[enc] = (uni,comment) # If there are more identity-mapped entries than unmapped entries, # it pays to generate an identity dictionary first, and add explicit # mappings to None for the rest if len(identity) >= len(unmapped): for enc in unmapped: enc2uni[enc] = (None, "") enc2uni['IDENTITY'] = 256 return enc2uni def hexrepr(t): if t is None: return 'None' try: len(t) except: return '0x%04x' % t try: return '(' + ', '.join(map(lambda t: '0x%04x' % t, t)) + ')' except TypeError, why: print '* failed to convert %r: %s' % (t, why) raise def python_mapdef_code(varname, map, comments=1): l = [] append = l.append if map.has_key("IDENTITY"): append("%s = codecs.make_identity_dict(range(%d))" % (varname, map["IDENTITY"])) append("%s.update({" % varname) splits = 1 del map["IDENTITY"] identity = 1 else: append("%s = {" % varname) splits = 0 identity = 0 mappings = map.items() mappings.sort() i = 0 for mapkey, mapvalue in mappings: mapcomment = '' if isinstance(mapkey, tuple): (mapkey, mapcomment) = mapkey if isinstance(mapvalue, tuple): (mapvalue, mapcomment) = mapvalue if mapkey is None: continue if (identity and mapkey == mapvalue and mapkey < 256): # No need to include identity mappings, since these # are already set for the first 256 code points. continue key = hexrepr(mapkey) value = hexrepr(mapvalue) if mapcomment and comments: append(' %s: %s,\t# %s' % (key, value, mapcomment)) else: append(' %s: %s,' % (key, value)) i += 1 if i == 4096: # Split the definition into parts to that the Python # parser doesn't dump core if splits == 0: append('}') else: append('})') append('%s.update({' % varname) i = 0 splits = splits + 1 if splits == 0: append('}') else: append('})') return l def python_tabledef_code(varname, map, comments=1): l = [] append = l.append append('%s = (' % varname) # Analyze map and create table dict mappings = map.items() mappings.sort() table = {} maxkey = 0 if map.has_key('IDENTITY'): for key in range(256): table[key] = (key, '') maxkey = 255 del map['IDENTITY'] for mapkey, mapvalue in mappings: mapcomment = '' if isinstance(mapkey, tuple): (mapkey, mapcomment) = mapkey if isinstance(mapvalue, tuple): (mapvalue, mapcomment) = mapvalue if mapkey is None: continue table[mapkey] = (mapvalue, mapcomment) if mapkey > maxkey: maxkey = mapkey if maxkey > MAX_TABLE_SIZE: # Table too large return None # Create table code for key in range(maxkey + 1): if key not in table: mapvalue = None mapcomment = 'UNDEFINED' else: mapvalue, mapcomment = table[key] if mapvalue is None: mapchar = UNI_UNDEFINED else: if isinstance(mapvalue, tuple): # 1-n mappings not supported return None else: mapchar = unichr(mapvalue) if mapcomment and comments: append(' %r\t# %s -> %s' % (mapchar, hexrepr(key), mapcomment)) else: append(' %r' % mapchar) append(')') return l def codegen(name, map, comments=1): """ Returns Python source for the given map. Comments are included in the source, if comments is true (default). """ # Generate code decoding_map_code = python_mapdef_code( 'decoding_map', map, comments=comments) decoding_table_code = python_tabledef_code( 'decoding_table', map, comments=comments) encoding_map_code = python_mapdef_code( 'encoding_map', codecs.make_encoding_map(map), comments=comments) l = [ '''\ """ Python Character Mapping Codec generated from '%s' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): ''' % name ] if decoding_table_code: l.append('''\ return codecs.charmap_decode(input,errors,decoding_table)''') else: l.append('''\ return codecs.charmap_decode(input,errors,decoding_map)''') l.append(''' class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map ''') l.extend(decoding_map_code) # Add optional decoding table if decoding_table_code: l.append(''' ### Decoding Table ''') l.extend(decoding_table_code) l.append(''' ### Encoding Map ''') l.extend(encoding_map_code) return '\n'.join(l) def pymap(name,map,pyfile,comments=1): code = codegen(name,map,comments) f = open(pyfile,'w') f.write(code) f.close() def marshalmap(name,map,marshalfile): d = {} for e,(u,c) in map.items(): d[e] = (u,c) f = open(marshalfile,'wb') marshal.dump(d,f) f.close() def convertdir(dir,prefix='',comments=1): mapnames = os.listdir(dir) for mapname in mapnames: mappathname = os.path.join(dir, mapname) name = os.path.split(mapname)[1] name = name.replace('-','_') name = name.split('.')[0] name = name.lower() codefile = name + '.py' marshalfile = name + '.mapping' print 'converting %s to %s and %s' % (mapname, prefix + codefile, prefix + marshalfile) try: map = readmap(os.path.join(dir,mapname)) if not map: print '* map is empty; skipping' else: pymap(mappathname, map, prefix + codefile,comments) marshalmap(mappathname, map, prefix + marshalfile) except ValueError, why: print '* conversion failed: %s' % why raise def rewritepythondir(dir,prefix='',comments=1): mapnames = os.listdir(dir) for mapname in mapnames: if not mapname.endswith('.mapping'): continue codefile = mapname[:-len('.mapping')] + '.py' print 'converting %s to %s' % (mapname, prefix + codefile) try: map = marshal.load(open(os.path.join(dir,mapname), 'rb')) if not map: print '* map is empty; skipping' else: pymap(mapname, map, prefix + codefile,comments) except ValueError, why: print '* conversion failed: %s' % why if __name__ == '__main__': import sys if 1: apply(convertdir,tuple(sys.argv[1:])) else: apply(rewritepythondir,tuple(sys.argv[1:])) From lemburg at users.sourceforge.net Fri Oct 21 15:47:06 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Fri, 21 Oct 2005 15:47:06 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Tools/unicode comparecodecs.py, NONE, 1.1 listcodecs.py, NONE, 1.1 Message-ID: <20051021134706.44FFD1E4006@bag.python.org> Update of /cvsroot/python/python/dist/src/Tools/unicode In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14104 Added Files: comparecodecs.py listcodecs.py Log Message: Add two new tools to compare codecs and show differences and to list all installed codecs. --- NEW FILE: comparecodecs.py --- #!/usr/bin/env python """ Compare the output of two codecs. (c) Copyright 2005, Marc-Andre Lemburg (mal at lemburg.com). Licensed to PSF under a Contributor Agreement. """ import sys def compare_codecs(encoding1, encoding2): print 'Comparing encoding/decoding of %r and %r' % (encoding1, encoding2) mismatch = 0 # Check encoding for i in range(sys.maxunicode): u = unichr(i) try: c1 = u.encode(encoding1) except UnicodeError, reason: c1 = '' try: c2 = u.encode(encoding2) except UnicodeError, reason: c2 = '' if c1 != c2: print ' * encoding mismatch for 0x%04X: %-14r != %r' % \ (i, c1, c2) mismatch += 1 # Check decoding for i in range(256): c = chr(i) try: u1 = c.decode(encoding1) except UnicodeError: u1 = u'' try: u2 = c.decode(encoding2) except UnicodeError: u2 = u'' if u1 != u2: print ' * decoding mismatch for 0x%04X: %-14r != %r' % \ (i, u1, u2) mismatch += 1 if mismatch: print print 'Found %i mismatches' % mismatch else: print '-> Codecs are identical.' if __name__ == '__main__': compare_codecs(sys.argv[1], sys.argv[2]) --- NEW FILE: listcodecs.py --- """ List all available codec modules. (c) Copyright 2005, Marc-Andre Lemburg (mal at lemburg.com). Licensed to PSF under a Contributor Agreement. """ import os, codecs, encodings _debug = 0 def listcodecs(dir): names = [] for filename in os.listdir(dir): if filename[-3:] != '.py': continue name = filename[:-3] # Check whether we've found a true codec try: codecs.lookup(name) except LookupError: # Codec not found continue except Exception, reason: # Probably an error from importing the codec; still it's # a valid code name if _debug: print '* problem importing codec %r: %s' % \ (name, reason) names.append(name) return names if __name__ == '__main__': names = listcodecs(encodings.__path__[0]) names.sort() print 'all_codecs = [' for name in names: print ' %r,' % name print ']' From lemburg at users.sourceforge.net Fri Oct 21 15:49:26 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Fri, 21 Oct 2005 15:49:26 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/encodings cp1006.py, 1.4, 1.5 cp1250.py, 1.4, 1.5 cp1251.py, 1.4, 1.5 cp1252.py, 1.4, 1.5 cp1253.py, 1.4, 1.5 cp1254.py, 1.4, 1.5 cp1255.py, 1.4, 1.5 cp1256.py, 1.4, 1.5 cp1257.py, 1.4, 1.5 cp1258.py, 1.4, 1.5 cp424.py, 1.4, 1.5 cp437.py, 1.4, 1.5 cp737.py, 1.4, 1.5 cp775.py, 1.4, 1.5 cp850.py, 1.4, 1.5 cp852.py, 1.4, 1.5 cp855.py, 1.4, 1.5 cp856.py, 1.5, 1.6 cp857.py, 1.4, 1.5 cp860.py, 1.4, 1.5 cp861.py, 1.4, 1.5 cp862.py, 1.4, 1.5 cp863.py, 1.4, 1.5 cp864.py, 1.4, 1.5 cp865.py, 1.4, 1.5 cp866.py, 1.4, 1.5 cp869.py, 1.4, 1.5 cp874.py, 1.4, 1.5 iso8859_1.py, 1.4, 1.5 iso8859_10.py, 1.4, 1.5 iso8859_11.py, 1.2, 1.3 iso8859_13.py, 1.4, 1.5 iso8859_14.py, 1.4, 1.5 iso8859_15.py, 1.4, 1.5 iso8859_16.py, 1.2, 1.3 iso8859_2.py, 1.4, 1.5 iso8859_3.py, 1.4, 1.5 iso8859_4.py, 1.4, 1.5 iso8859_5.py, 1.4, 1.5 iso8859_6.py, 1.4, 1.5 iso8859_7.py, 1.4, 1.5 iso8859_8.py, 1.4, 1.5 iso8859_9.py, 1.4, 1.5 koi8_r.py, 1.4, 1.5 mac_cyrillic.py, 1.4, 1.5 mac_greek.py, 1.4, 1.5 mac_iceland.py, 1.4, 1.5 mac_roman.py, 1.4, 1.5 mac_turkish.py, 1.4, 1.5 Message-ID: <20051021134926.376271E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/encodings In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14602 Modified Files: cp1006.py cp1250.py cp1251.py cp1252.py cp1253.py cp1254.py cp1255.py cp1256.py cp1257.py cp1258.py cp424.py cp437.py cp737.py cp775.py cp850.py cp852.py cp855.py cp856.py cp857.py cp860.py cp861.py cp862.py cp863.py cp864.py cp865.py cp866.py cp869.py cp874.py iso8859_1.py iso8859_10.py iso8859_11.py iso8859_13.py iso8859_14.py iso8859_15.py iso8859_16.py iso8859_2.py iso8859_3.py iso8859_4.py iso8859_5.py iso8859_6.py iso8859_7.py iso8859_8.py iso8859_9.py koi8_r.py mac_cyrillic.py mac_greek.py mac_iceland.py mac_roman.py mac_turkish.py Log Message: Replace the old charmap codecs with new ones generated from the current mapping tables available at ftp.unicode.org. These new codecs include and use character decoding tables which speeds up decoding by a few factors. Index: cp1006.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1006.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp1006.py 8 Aug 2002 20:19:18 -0000 1.4 +++ cp1006.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1006.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MISC/CP1006.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,102 +32,619 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO - 0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE - 0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO - 0x00a4: 0x06f3, # EXTENDED ARABIC-INDIC DIGIT THREE - 0x00a5: 0x06f4, # EXTENDED ARABIC-INDIC DIGIT FOUR - 0x00a6: 0x06f5, # EXTENDED ARABIC-INDIC DIGIT FIVE - 0x00a7: 0x06f6, # EXTENDED ARABIC-INDIC DIGIT SIX - 0x00a8: 0x06f7, # EXTENDED ARABIC-INDIC DIGIT SEVEN - 0x00a9: 0x06f8, # EXTENDED ARABIC-INDIC DIGIT EIGHT - 0x00aa: 0x06f9, # EXTENDED ARABIC-INDIC DIGIT NINE - 0x00ab: 0x060c, # ARABIC COMMA - 0x00ac: 0x061b, # ARABIC SEMICOLON - 0x00ae: 0x061f, # ARABIC QUESTION MARK - 0x00af: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - 0x00b0: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM - 0x00b1: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM - 0x00b2: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM - 0x00b3: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM - 0x00b4: 0xfe91, # ARABIC LETTER BEH INITIAL FORM - 0x00b5: 0xfb56, # ARABIC LETTER PEH ISOLATED FORM - 0x00b6: 0xfb58, # ARABIC LETTER PEH INITIAL FORM - 0x00b7: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM - 0x00b8: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM - 0x00b9: 0xfe97, # ARABIC LETTER TEH INITIAL FORM - 0x00ba: 0xfb66, # ARABIC LETTER TTEH ISOLATED FORM - 0x00bb: 0xfb68, # ARABIC LETTER TTEH INITIAL FORM - 0x00bc: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM - 0x00bd: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM - 0x00be: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM - 0x00bf: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM - 0x00c0: 0xfb7a, # ARABIC LETTER TCHEH ISOLATED FORM - 0x00c1: 0xfb7c, # ARABIC LETTER TCHEH INITIAL FORM - 0x00c2: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM - 0x00c3: 0xfea3, # ARABIC LETTER HAH INITIAL FORM - 0x00c4: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM - 0x00c5: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM - 0x00c6: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM - 0x00c7: 0xfb84, # ARABIC LETTER DAHAL ISOLATED FORMN - 0x00c8: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM - 0x00c9: 0xfead, # ARABIC LETTER REH ISOLATED FORM - 0x00ca: 0xfb8c, # ARABIC LETTER RREH ISOLATED FORM - 0x00cb: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM - 0x00cc: 0xfb8a, # ARABIC LETTER JEH ISOLATED FORM - 0x00cd: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM - 0x00ce: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM - 0x00cf: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM - 0x00d0: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM - 0x00d1: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM - 0x00d2: 0xfebb, # ARABIC LETTER SAD INITIAL FORM - 0x00d3: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM - 0x00d4: 0xfebf, # ARABIC LETTER DAD INITIAL FORM - 0x00d5: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM - 0x00d6: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM - 0x00d7: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM - 0x00d8: 0xfeca, # ARABIC LETTER AIN FINAL FORM - 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM - 0x00da: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM - 0x00db: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM - 0x00dc: 0xfece, # ARABIC LETTER GHAIN FINAL FORM - 0x00dd: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM - 0x00de: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM - 0x00df: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM - 0x00e0: 0xfed3, # ARABIC LETTER FEH INITIAL FORM - 0x00e1: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM - 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM - 0x00e3: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM - 0x00e4: 0xfedb, # ARABIC LETTER KAF INITIAL FORM - 0x00e5: 0xfb92, # ARABIC LETTER GAF ISOLATED FORM - 0x00e6: 0xfb94, # ARABIC LETTER GAF INITIAL FORM - 0x00e7: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM - 0x00e8: 0xfedf, # ARABIC LETTER LAM INITIAL FORM - 0x00e9: 0xfee0, # ARABIC LETTER LAM MEDIAL FORM - 0x00ea: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM - 0x00eb: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM - 0x00ec: 0xfb9e, # ARABIC LETTER NOON GHUNNA ISOLATED FORM - 0x00ed: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM - 0x00ee: 0xfee7, # ARABIC LETTER NOON INITIAL FORM - 0x00ef: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - 0x00f0: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM - 0x00f1: 0xfba6, # ARABIC LETTER HEH GOAL ISOLATED FORM - 0x00f2: 0xfba8, # ARABIC LETTER HEH GOAL INITIAL FORM - 0x00f3: 0xfba9, # ARABIC LETTER HEH GOAL MEDIAL FORM - 0x00f4: 0xfbaa, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM - 0x00f5: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM - 0x00f6: 0xfe89, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM - 0x00f7: 0xfe8a, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM - 0x00f8: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - 0x00f9: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM - 0x00fa: 0xfef2, # ARABIC LETTER YEH FINAL FORM - 0x00fb: 0xfef3, # ARABIC LETTER YEH INITIAL FORM - 0x00fc: 0xfbb0, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM - 0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM - 0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM - 0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM + 0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO + 0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE + 0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO + 0x00a4: 0x06f3, # EXTENDED ARABIC-INDIC DIGIT THREE + 0x00a5: 0x06f4, # EXTENDED ARABIC-INDIC DIGIT FOUR + 0x00a6: 0x06f5, # EXTENDED ARABIC-INDIC DIGIT FIVE + 0x00a7: 0x06f6, # EXTENDED ARABIC-INDIC DIGIT SIX + 0x00a8: 0x06f7, # EXTENDED ARABIC-INDIC DIGIT SEVEN + 0x00a9: 0x06f8, # EXTENDED ARABIC-INDIC DIGIT EIGHT + 0x00aa: 0x06f9, # EXTENDED ARABIC-INDIC DIGIT NINE + 0x00ab: 0x060c, # ARABIC COMMA + 0x00ac: 0x061b, # ARABIC SEMICOLON + 0x00ae: 0x061f, # ARABIC QUESTION MARK + 0x00af: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0x00b0: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM + 0x00b1: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM + 0x00b2: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM + 0x00b3: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM + 0x00b4: 0xfe91, # ARABIC LETTER BEH INITIAL FORM + 0x00b5: 0xfb56, # ARABIC LETTER PEH ISOLATED FORM + 0x00b6: 0xfb58, # ARABIC LETTER PEH INITIAL FORM + 0x00b7: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0x00b8: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM + 0x00b9: 0xfe97, # ARABIC LETTER TEH INITIAL FORM + 0x00ba: 0xfb66, # ARABIC LETTER TTEH ISOLATED FORM + 0x00bb: 0xfb68, # ARABIC LETTER TTEH INITIAL FORM + 0x00bc: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM + 0x00bd: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM + 0x00be: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM + 0x00bf: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM + 0x00c0: 0xfb7a, # ARABIC LETTER TCHEH ISOLATED FORM + 0x00c1: 0xfb7c, # ARABIC LETTER TCHEH INITIAL FORM + 0x00c2: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM + 0x00c3: 0xfea3, # ARABIC LETTER HAH INITIAL FORM + 0x00c4: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM + 0x00c5: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM + 0x00c6: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM + 0x00c7: 0xfb84, # ARABIC LETTER DAHAL ISOLATED FORMN + 0x00c8: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM + 0x00c9: 0xfead, # ARABIC LETTER REH ISOLATED FORM + 0x00ca: 0xfb8c, # ARABIC LETTER RREH ISOLATED FORM + 0x00cb: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM + 0x00cc: 0xfb8a, # ARABIC LETTER JEH ISOLATED FORM + 0x00cd: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM + 0x00ce: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM + 0x00cf: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM + 0x00d0: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM + 0x00d1: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM + 0x00d2: 0xfebb, # ARABIC LETTER SAD INITIAL FORM + 0x00d3: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM + 0x00d4: 0xfebf, # ARABIC LETTER DAD INITIAL FORM + 0x00d5: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM + 0x00d6: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM + 0x00d7: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM + 0x00d8: 0xfeca, # ARABIC LETTER AIN FINAL FORM + 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM + 0x00da: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM + 0x00db: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM + 0x00dc: 0xfece, # ARABIC LETTER GHAIN FINAL FORM + 0x00dd: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM + 0x00de: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM + 0x00df: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM + 0x00e0: 0xfed3, # ARABIC LETTER FEH INITIAL FORM + 0x00e1: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM + 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM + 0x00e3: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM + 0x00e4: 0xfedb, # ARABIC LETTER KAF INITIAL FORM + 0x00e5: 0xfb92, # ARABIC LETTER GAF ISOLATED FORM + 0x00e6: 0xfb94, # ARABIC LETTER GAF INITIAL FORM + 0x00e7: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM + 0x00e8: 0xfedf, # ARABIC LETTER LAM INITIAL FORM + 0x00e9: 0xfee0, # ARABIC LETTER LAM MEDIAL FORM + 0x00ea: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM + 0x00eb: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM + 0x00ec: 0xfb9e, # ARABIC LETTER NOON GHUNNA ISOLATED FORM + 0x00ed: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM + 0x00ee: 0xfee7, # ARABIC LETTER NOON INITIAL FORM + 0x00ef: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0x00f0: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM + 0x00f1: 0xfba6, # ARABIC LETTER HEH GOAL ISOLATED FORM + 0x00f2: 0xfba8, # ARABIC LETTER HEH GOAL INITIAL FORM + 0x00f3: 0xfba9, # ARABIC LETTER HEH GOAL MEDIAL FORM + 0x00f4: 0xfbaa, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM + 0x00f5: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM + 0x00f6: 0xfe89, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM + 0x00f7: 0xfe8a, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM + 0x00f8: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0x00f9: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM + 0x00fa: 0xfef2, # ARABIC LETTER YEH FINAL FORM + 0x00fb: 0xfef3, # ARABIC LETTER YEH INITIAL FORM + 0x00fc: 0xfbb0, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM + 0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM + 0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM + 0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u06f0' # 0x00a1 -> EXTENDED ARABIC-INDIC DIGIT ZERO + u'\u06f1' # 0x00a2 -> EXTENDED ARABIC-INDIC DIGIT ONE + u'\u06f2' # 0x00a3 -> EXTENDED ARABIC-INDIC DIGIT TWO + u'\u06f3' # 0x00a4 -> EXTENDED ARABIC-INDIC DIGIT THREE + u'\u06f4' # 0x00a5 -> EXTENDED ARABIC-INDIC DIGIT FOUR + u'\u06f5' # 0x00a6 -> EXTENDED ARABIC-INDIC DIGIT FIVE + u'\u06f6' # 0x00a7 -> EXTENDED ARABIC-INDIC DIGIT SIX + u'\u06f7' # 0x00a8 -> EXTENDED ARABIC-INDIC DIGIT SEVEN + u'\u06f8' # 0x00a9 -> EXTENDED ARABIC-INDIC DIGIT EIGHT + u'\u06f9' # 0x00aa -> EXTENDED ARABIC-INDIC DIGIT NINE + u'\u060c' # 0x00ab -> ARABIC COMMA + u'\u061b' # 0x00ac -> ARABIC SEMICOLON + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\u061f' # 0x00ae -> ARABIC QUESTION MARK + u'\ufe81' # 0x00af -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + u'\ufe8d' # 0x00b0 -> ARABIC LETTER ALEF ISOLATED FORM + u'\ufe8e' # 0x00b1 -> ARABIC LETTER ALEF FINAL FORM + u'\ufe8e' # 0x00b2 -> ARABIC LETTER ALEF FINAL FORM + u'\ufe8f' # 0x00b3 -> ARABIC LETTER BEH ISOLATED FORM + u'\ufe91' # 0x00b4 -> ARABIC LETTER BEH INITIAL FORM + u'\ufb56' # 0x00b5 -> ARABIC LETTER PEH ISOLATED FORM + u'\ufb58' # 0x00b6 -> ARABIC LETTER PEH INITIAL FORM + u'\ufe93' # 0x00b7 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM + u'\ufe95' # 0x00b8 -> ARABIC LETTER TEH ISOLATED FORM + u'\ufe97' # 0x00b9 -> ARABIC LETTER TEH INITIAL FORM + u'\ufb66' # 0x00ba -> ARABIC LETTER TTEH ISOLATED FORM + u'\ufb68' # 0x00bb -> ARABIC LETTER TTEH INITIAL FORM + u'\ufe99' # 0x00bc -> ARABIC LETTER THEH ISOLATED FORM + u'\ufe9b' # 0x00bd -> ARABIC LETTER THEH INITIAL FORM + u'\ufe9d' # 0x00be -> ARABIC LETTER JEEM ISOLATED FORM + u'\ufe9f' # 0x00bf -> ARABIC LETTER JEEM INITIAL FORM + u'\ufb7a' # 0x00c0 -> ARABIC LETTER TCHEH ISOLATED FORM + u'\ufb7c' # 0x00c1 -> ARABIC LETTER TCHEH INITIAL FORM + u'\ufea1' # 0x00c2 -> ARABIC LETTER HAH ISOLATED FORM + u'\ufea3' # 0x00c3 -> ARABIC LETTER HAH INITIAL FORM + u'\ufea5' # 0x00c4 -> ARABIC LETTER KHAH ISOLATED FORM + u'\ufea7' # 0x00c5 -> ARABIC LETTER KHAH INITIAL FORM + u'\ufea9' # 0x00c6 -> ARABIC LETTER DAL ISOLATED FORM + u'\ufb84' # 0x00c7 -> ARABIC LETTER DAHAL ISOLATED FORMN + u'\ufeab' # 0x00c8 -> ARABIC LETTER THAL ISOLATED FORM + u'\ufead' # 0x00c9 -> ARABIC LETTER REH ISOLATED FORM + u'\ufb8c' # 0x00ca -> ARABIC LETTER RREH ISOLATED FORM + u'\ufeaf' # 0x00cb -> ARABIC LETTER ZAIN ISOLATED FORM + u'\ufb8a' # 0x00cc -> ARABIC LETTER JEH ISOLATED FORM + u'\ufeb1' # 0x00cd -> ARABIC LETTER SEEN ISOLATED FORM + u'\ufeb3' # 0x00ce -> ARABIC LETTER SEEN INITIAL FORM + u'\ufeb5' # 0x00cf -> ARABIC LETTER SHEEN ISOLATED FORM + u'\ufeb7' # 0x00d0 -> ARABIC LETTER SHEEN INITIAL FORM + u'\ufeb9' # 0x00d1 -> ARABIC LETTER SAD ISOLATED FORM + u'\ufebb' # 0x00d2 -> ARABIC LETTER SAD INITIAL FORM + u'\ufebd' # 0x00d3 -> ARABIC LETTER DAD ISOLATED FORM + u'\ufebf' # 0x00d4 -> ARABIC LETTER DAD INITIAL FORM + u'\ufec1' # 0x00d5 -> ARABIC LETTER TAH ISOLATED FORM + u'\ufec5' # 0x00d6 -> ARABIC LETTER ZAH ISOLATED FORM + u'\ufec9' # 0x00d7 -> ARABIC LETTER AIN ISOLATED FORM + u'\ufeca' # 0x00d8 -> ARABIC LETTER AIN FINAL FORM + u'\ufecb' # 0x00d9 -> ARABIC LETTER AIN INITIAL FORM + u'\ufecc' # 0x00da -> ARABIC LETTER AIN MEDIAL FORM + u'\ufecd' # 0x00db -> ARABIC LETTER GHAIN ISOLATED FORM + u'\ufece' # 0x00dc -> ARABIC LETTER GHAIN FINAL FORM + u'\ufecf' # 0x00dd -> ARABIC LETTER GHAIN INITIAL FORM + u'\ufed0' # 0x00de -> ARABIC LETTER GHAIN MEDIAL FORM + u'\ufed1' # 0x00df -> ARABIC LETTER FEH ISOLATED FORM + u'\ufed3' # 0x00e0 -> ARABIC LETTER FEH INITIAL FORM + u'\ufed5' # 0x00e1 -> ARABIC LETTER QAF ISOLATED FORM + u'\ufed7' # 0x00e2 -> ARABIC LETTER QAF INITIAL FORM + u'\ufed9' # 0x00e3 -> ARABIC LETTER KAF ISOLATED FORM + u'\ufedb' # 0x00e4 -> ARABIC LETTER KAF INITIAL FORM + u'\ufb92' # 0x00e5 -> ARABIC LETTER GAF ISOLATED FORM + u'\ufb94' # 0x00e6 -> ARABIC LETTER GAF INITIAL FORM + u'\ufedd' # 0x00e7 -> ARABIC LETTER LAM ISOLATED FORM + u'\ufedf' # 0x00e8 -> ARABIC LETTER LAM INITIAL FORM + u'\ufee0' # 0x00e9 -> ARABIC LETTER LAM MEDIAL FORM + u'\ufee1' # 0x00ea -> ARABIC LETTER MEEM ISOLATED FORM + u'\ufee3' # 0x00eb -> ARABIC LETTER MEEM INITIAL FORM + u'\ufb9e' # 0x00ec -> ARABIC LETTER NOON GHUNNA ISOLATED FORM + u'\ufee5' # 0x00ed -> ARABIC LETTER NOON ISOLATED FORM + u'\ufee7' # 0x00ee -> ARABIC LETTER NOON INITIAL FORM + u'\ufe85' # 0x00ef -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + u'\ufeed' # 0x00f0 -> ARABIC LETTER WAW ISOLATED FORM + u'\ufba6' # 0x00f1 -> ARABIC LETTER HEH GOAL ISOLATED FORM + u'\ufba8' # 0x00f2 -> ARABIC LETTER HEH GOAL INITIAL FORM + u'\ufba9' # 0x00f3 -> ARABIC LETTER HEH GOAL MEDIAL FORM + u'\ufbaa' # 0x00f4 -> ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM + u'\ufe80' # 0x00f5 -> ARABIC LETTER HAMZA ISOLATED FORM + u'\ufe89' # 0x00f6 -> ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM + u'\ufe8a' # 0x00f7 -> ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM + u'\ufe8b' # 0x00f8 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + u'\ufef1' # 0x00f9 -> ARABIC LETTER YEH ISOLATED FORM + u'\ufef2' # 0x00fa -> ARABIC LETTER YEH FINAL FORM + u'\ufef3' # 0x00fb -> ARABIC LETTER YEH INITIAL FORM + u'\ufbb0' # 0x00fc -> ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM + u'\ufbae' # 0x00fd -> ARABIC LETTER YEH BARREE ISOLATED FORM + u'\ufe7c' # 0x00fe -> ARABIC SHADDA ISOLATED FORM + u'\ufe7d' # 0x00ff -> ARABIC SHADDA MEDIAL FORM +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x060c: 0x00ab, # ARABIC COMMA + 0x061b: 0x00ac, # ARABIC SEMICOLON + 0x061f: 0x00ae, # ARABIC QUESTION MARK + 0x06f0: 0x00a1, # EXTENDED ARABIC-INDIC DIGIT ZERO + 0x06f1: 0x00a2, # EXTENDED ARABIC-INDIC DIGIT ONE + 0x06f2: 0x00a3, # EXTENDED ARABIC-INDIC DIGIT TWO + 0x06f3: 0x00a4, # EXTENDED ARABIC-INDIC DIGIT THREE + 0x06f4: 0x00a5, # EXTENDED ARABIC-INDIC DIGIT FOUR + 0x06f5: 0x00a6, # EXTENDED ARABIC-INDIC DIGIT FIVE + 0x06f6: 0x00a7, # EXTENDED ARABIC-INDIC DIGIT SIX + 0x06f7: 0x00a8, # EXTENDED ARABIC-INDIC DIGIT SEVEN + 0x06f8: 0x00a9, # EXTENDED ARABIC-INDIC DIGIT EIGHT + 0x06f9: 0x00aa, # EXTENDED ARABIC-INDIC DIGIT NINE + 0xfb56: 0x00b5, # ARABIC LETTER PEH ISOLATED FORM + 0xfb58: 0x00b6, # ARABIC LETTER PEH INITIAL FORM + 0xfb66: 0x00ba, # ARABIC LETTER TTEH ISOLATED FORM + 0xfb68: 0x00bb, # ARABIC LETTER TTEH INITIAL FORM + 0xfb7a: 0x00c0, # ARABIC LETTER TCHEH ISOLATED FORM + 0xfb7c: 0x00c1, # ARABIC LETTER TCHEH INITIAL FORM + 0xfb84: 0x00c7, # ARABIC LETTER DAHAL ISOLATED FORMN + 0xfb8a: 0x00cc, # ARABIC LETTER JEH ISOLATED FORM + 0xfb8c: 0x00ca, # ARABIC LETTER RREH ISOLATED FORM + 0xfb92: 0x00e5, # ARABIC LETTER GAF ISOLATED FORM + 0xfb94: 0x00e6, # ARABIC LETTER GAF INITIAL FORM + 0xfb9e: 0x00ec, # ARABIC LETTER NOON GHUNNA ISOLATED FORM + 0xfba6: 0x00f1, # ARABIC LETTER HEH GOAL ISOLATED FORM + 0xfba8: 0x00f2, # ARABIC LETTER HEH GOAL INITIAL FORM + 0xfba9: 0x00f3, # ARABIC LETTER HEH GOAL MEDIAL FORM + 0xfbaa: 0x00f4, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM + 0xfbae: 0x00fd, # ARABIC LETTER YEH BARREE ISOLATED FORM + 0xfbb0: 0x00fc, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM + 0xfe7c: 0x00fe, # ARABIC SHADDA ISOLATED FORM + 0xfe7d: 0x00ff, # ARABIC SHADDA MEDIAL FORM + 0xfe80: 0x00f5, # ARABIC LETTER HAMZA ISOLATED FORM + 0xfe81: 0x00af, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0xfe85: 0x00ef, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0xfe89: 0x00f6, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM + 0xfe8a: 0x00f7, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM + 0xfe8b: 0x00f8, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0xfe8d: 0x00b0, # ARABIC LETTER ALEF ISOLATED FORM + 0xfe8e: None, # ARABIC LETTER ALEF FINAL FORM + 0xfe8f: 0x00b3, # ARABIC LETTER BEH ISOLATED FORM + 0xfe91: 0x00b4, # ARABIC LETTER BEH INITIAL FORM + 0xfe93: 0x00b7, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0xfe95: 0x00b8, # ARABIC LETTER TEH ISOLATED FORM + 0xfe97: 0x00b9, # ARABIC LETTER TEH INITIAL FORM + 0xfe99: 0x00bc, # ARABIC LETTER THEH ISOLATED FORM + 0xfe9b: 0x00bd, # ARABIC LETTER THEH INITIAL FORM + 0xfe9d: 0x00be, # ARABIC LETTER JEEM ISOLATED FORM + 0xfe9f: 0x00bf, # ARABIC LETTER JEEM INITIAL FORM + 0xfea1: 0x00c2, # ARABIC LETTER HAH ISOLATED FORM + 0xfea3: 0x00c3, # ARABIC LETTER HAH INITIAL FORM + 0xfea5: 0x00c4, # ARABIC LETTER KHAH ISOLATED FORM + 0xfea7: 0x00c5, # ARABIC LETTER KHAH INITIAL FORM + 0xfea9: 0x00c6, # ARABIC LETTER DAL ISOLATED FORM + 0xfeab: 0x00c8, # ARABIC LETTER THAL ISOLATED FORM + 0xfead: 0x00c9, # ARABIC LETTER REH ISOLATED FORM + 0xfeaf: 0x00cb, # ARABIC LETTER ZAIN ISOLATED FORM + 0xfeb1: 0x00cd, # ARABIC LETTER SEEN ISOLATED FORM + 0xfeb3: 0x00ce, # ARABIC LETTER SEEN INITIAL FORM + 0xfeb5: 0x00cf, # ARABIC LETTER SHEEN ISOLATED FORM + 0xfeb7: 0x00d0, # ARABIC LETTER SHEEN INITIAL FORM + 0xfeb9: 0x00d1, # ARABIC LETTER SAD ISOLATED FORM + 0xfebb: 0x00d2, # ARABIC LETTER SAD INITIAL FORM + 0xfebd: 0x00d3, # ARABIC LETTER DAD ISOLATED FORM + 0xfebf: 0x00d4, # ARABIC LETTER DAD INITIAL FORM + 0xfec1: 0x00d5, # ARABIC LETTER TAH ISOLATED FORM + 0xfec5: 0x00d6, # ARABIC LETTER ZAH ISOLATED FORM + 0xfec9: 0x00d7, # ARABIC LETTER AIN ISOLATED FORM + 0xfeca: 0x00d8, # ARABIC LETTER AIN FINAL FORM + 0xfecb: 0x00d9, # ARABIC LETTER AIN INITIAL FORM + 0xfecc: 0x00da, # ARABIC LETTER AIN MEDIAL FORM + 0xfecd: 0x00db, # ARABIC LETTER GHAIN ISOLATED FORM + 0xfece: 0x00dc, # ARABIC LETTER GHAIN FINAL FORM + 0xfecf: 0x00dd, # ARABIC LETTER GHAIN INITIAL FORM + 0xfed0: 0x00de, # ARABIC LETTER GHAIN MEDIAL FORM + 0xfed1: 0x00df, # ARABIC LETTER FEH ISOLATED FORM + 0xfed3: 0x00e0, # ARABIC LETTER FEH INITIAL FORM + 0xfed5: 0x00e1, # ARABIC LETTER QAF ISOLATED FORM + 0xfed7: 0x00e2, # ARABIC LETTER QAF INITIAL FORM + 0xfed9: 0x00e3, # ARABIC LETTER KAF ISOLATED FORM + 0xfedb: 0x00e4, # ARABIC LETTER KAF INITIAL FORM + 0xfedd: 0x00e7, # ARABIC LETTER LAM ISOLATED FORM + 0xfedf: 0x00e8, # ARABIC LETTER LAM INITIAL FORM + 0xfee0: 0x00e9, # ARABIC LETTER LAM MEDIAL FORM + 0xfee1: 0x00ea, # ARABIC LETTER MEEM ISOLATED FORM + 0xfee3: 0x00eb, # ARABIC LETTER MEEM INITIAL FORM + 0xfee5: 0x00ed, # ARABIC LETTER NOON ISOLATED FORM + 0xfee7: 0x00ee, # ARABIC LETTER NOON INITIAL FORM + 0xfeed: 0x00f0, # ARABIC LETTER WAW ISOLATED FORM + 0xfef1: 0x00f9, # ARABIC LETTER YEH ISOLATED FORM + 0xfef2: 0x00fa, # ARABIC LETTER YEH FINAL FORM + 0xfef3: 0x00fb, # ARABIC LETTER YEH INITIAL FORM +} \ No newline at end of file Index: cp1250.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1250.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp1250.py 8 Aug 2002 20:19:18 -0000 1.4 +++ cp1250.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1250.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1250.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,87 +32,600 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: None, # UNDEFINED - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: None, # UNDEFINED - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x008d: 0x0164, # LATIN CAPITAL LETTER T WITH CARON - 0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: None, # UNDEFINED - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x009d: 0x0165, # LATIN SMALL LETTER T WITH CARON - 0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x009f: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00a1: 0x02c7, # CARON - 0x00a2: 0x02d8, # BREVE - 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00a5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00b2: 0x02db, # OGONEK - 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00b9: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00bc: 0x013d, # LATIN CAPITAL LETTER L WITH CARON - 0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00be: 0x013e, # LATIN SMALL LETTER L WITH CARON - 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE - 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE - 0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON - 0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON - 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON - 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON - 0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE - 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE - 0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON - 0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON - 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON - 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON - 0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE - 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA - 0x00ff: 0x02d9, # DOT ABOVE + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: None, # UNDEFINED + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: None, # UNDEFINED + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x008d: 0x0164, # LATIN CAPITAL LETTER T WITH CARON + 0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: None, # UNDEFINED + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x009d: 0x0165, # LATIN SMALL LETTER T WITH CARON + 0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x009f: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00a1: 0x02c7, # CARON + 0x00a2: 0x02d8, # BREVE + 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00a5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00b2: 0x02db, # OGONEK + 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00b9: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00bc: 0x013d, # LATIN CAPITAL LETTER L WITH CARON + 0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00be: 0x013e, # LATIN SMALL LETTER L WITH CARON + 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE + 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE + 0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON + 0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON + 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON + 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON + 0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE + 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE + 0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON + 0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON + 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON + 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON + 0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE + 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA + 0x00ff: 0x02d9, # DOT ABOVE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\ufffe' # 0x0083 -> UNDEFINED + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\ufffe' # 0x0088 -> UNDEFINED + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\u0160' # 0x008a -> LATIN CAPITAL LETTER S WITH CARON + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u015a' # 0x008c -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u0164' # 0x008d -> LATIN CAPITAL LETTER T WITH CARON + u'\u017d' # 0x008e -> LATIN CAPITAL LETTER Z WITH CARON + u'\u0179' # 0x008f -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\ufffe' # 0x0098 -> UNDEFINED + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\u0161' # 0x009a -> LATIN SMALL LETTER S WITH CARON + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u015b' # 0x009c -> LATIN SMALL LETTER S WITH ACUTE + u'\u0165' # 0x009d -> LATIN SMALL LETTER T WITH CARON + u'\u017e' # 0x009e -> LATIN SMALL LETTER Z WITH CARON + u'\u017a' # 0x009f -> LATIN SMALL LETTER Z WITH ACUTE + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u02c7' # 0x00a1 -> CARON + u'\u02d8' # 0x00a2 -> BREVE + u'\u0141' # 0x00a3 -> LATIN CAPITAL LETTER L WITH STROKE + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\u0104' # 0x00a5 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u015e' # 0x00aa -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\u017b' # 0x00af -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u02db' # 0x00b2 -> OGONEK + u'\u0142' # 0x00b3 -> LATIN SMALL LETTER L WITH STROKE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\u0105' # 0x00b9 -> LATIN SMALL LETTER A WITH OGONEK + u'\u015f' # 0x00ba -> LATIN SMALL LETTER S WITH CEDILLA + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u013d' # 0x00bc -> LATIN CAPITAL LETTER L WITH CARON + u'\u02dd' # 0x00bd -> DOUBLE ACUTE ACCENT + u'\u013e' # 0x00be -> LATIN SMALL LETTER L WITH CARON + u'\u017c' # 0x00bf -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u0154' # 0x00c0 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0x00c3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0139' # 0x00c5 -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u0106' # 0x00c6 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\u010c' # 0x00c8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0x00ca -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u011a' # 0x00cc -> LATIN CAPITAL LETTER E WITH CARON + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u010e' # 0x00cf -> LATIN CAPITAL LETTER D WITH CARON + u'\u0110' # 0x00d0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0143' # 0x00d1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0147' # 0x00d2 -> LATIN CAPITAL LETTER N WITH CARON + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0150' # 0x00d5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\u0158' # 0x00d8 -> LATIN CAPITAL LETTER R WITH CARON + u'\u016e' # 0x00d9 -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u0170' # 0x00db -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0x00dd -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0162' # 0x00de -> LATIN CAPITAL LETTER T WITH CEDILLA + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\u0155' # 0x00e0 -> LATIN SMALL LETTER R WITH ACUTE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0x00e3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u013a' # 0x00e5 -> LATIN SMALL LETTER L WITH ACUTE + u'\u0107' # 0x00e6 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\u010d' # 0x00e8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0x00ea -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u011b' # 0x00ec -> LATIN SMALL LETTER E WITH CARON + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u010f' # 0x00ef -> LATIN SMALL LETTER D WITH CARON + u'\u0111' # 0x00f0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0144' # 0x00f1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0148' # 0x00f2 -> LATIN SMALL LETTER N WITH CARON + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0151' # 0x00f5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\u0159' # 0x00f8 -> LATIN SMALL LETTER R WITH CARON + u'\u016f' # 0x00f9 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\u0171' # 0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0x00fd -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0163' # 0x00fe -> LATIN SMALL LETTER T WITH CEDILLA + u'\u02d9' # 0x00ff -> DOT ABOVE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x0102: 0x00c3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0x00e3, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0x00a5, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00b9, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x00c6, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x00e6, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00c8, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00e8, # LATIN SMALL LETTER C WITH CARON + 0x010e: 0x00cf, # LATIN CAPITAL LETTER D WITH CARON + 0x010f: 0x00ef, # LATIN SMALL LETTER D WITH CARON + 0x0110: 0x00d0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00f0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0x00ca, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00ea, # LATIN SMALL LETTER E WITH OGONEK + 0x011a: 0x00cc, # LATIN CAPITAL LETTER E WITH CARON + 0x011b: 0x00ec, # LATIN SMALL LETTER E WITH CARON + 0x0139: 0x00c5, # LATIN CAPITAL LETTER L WITH ACUTE + 0x013a: 0x00e5, # LATIN SMALL LETTER L WITH ACUTE + 0x013d: 0x00bc, # LATIN CAPITAL LETTER L WITH CARON + 0x013e: 0x00be, # LATIN SMALL LETTER L WITH CARON + 0x0141: 0x00a3, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x00b3, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00d1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00f1, # LATIN SMALL LETTER N WITH ACUTE + 0x0147: 0x00d2, # LATIN CAPITAL LETTER N WITH CARON + 0x0148: 0x00f2, # LATIN SMALL LETTER N WITH CARON + 0x0150: 0x00d5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0x00f5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0154: 0x00c0, # LATIN CAPITAL LETTER R WITH ACUTE + 0x0155: 0x00e0, # LATIN SMALL LETTER R WITH ACUTE + 0x0158: 0x00d8, # LATIN CAPITAL LETTER R WITH CARON + 0x0159: 0x00f8, # LATIN SMALL LETTER R WITH CARON + 0x015a: 0x008c, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x009c, # LATIN SMALL LETTER S WITH ACUTE + 0x015e: 0x00aa, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00ba, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0x008a, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x009a, # LATIN SMALL LETTER S WITH CARON + 0x0162: 0x00de, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x0163: 0x00fe, # LATIN SMALL LETTER T WITH CEDILLA + 0x0164: 0x008d, # LATIN CAPITAL LETTER T WITH CARON + 0x0165: 0x009d, # LATIN SMALL LETTER T WITH CARON + 0x016e: 0x00d9, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x016f: 0x00f9, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0170: 0x00db, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0x00fb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0179: 0x008f, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x009f, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00af, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00bf, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x008e, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x009e, # LATIN SMALL LETTER Z WITH CARON + 0x02c7: 0x00a1, # CARON + 0x02d8: 0x00a2, # BREVE + 0x02d9: 0x00ff, # DOT ABOVE + 0x02db: 0x00b2, # OGONEK + 0x02dd: 0x00bd, # DOUBLE ACUTE ACCENT + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +} \ No newline at end of file Index: cp1251.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1251.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp1251.py 8 Aug 2002 20:19:18 -0000 1.4 +++ cp1251.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1251.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1251.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,121 +32,638 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE - 0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0453, # CYRILLIC SMALL LETTER GJE - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x20ac, # EURO SIGN - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0409, # CYRILLIC CAPITAL LETTER LJE - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x040a, # CYRILLIC CAPITAL LETTER NJE - 0x008d: 0x040c, # CYRILLIC CAPITAL LETTER KJE - 0x008e: 0x040b, # CYRILLIC CAPITAL LETTER TSHE - 0x008f: 0x040f, # CYRILLIC CAPITAL LETTER DZHE - 0x0090: 0x0452, # CYRILLIC SMALL LETTER DJE - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: None, # UNDEFINED - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0459, # CYRILLIC SMALL LETTER LJE - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x045a, # CYRILLIC SMALL LETTER NJE - 0x009d: 0x045c, # CYRILLIC SMALL LETTER KJE - 0x009e: 0x045b, # CYRILLIC SMALL LETTER TSHE - 0x009f: 0x045f, # CYRILLIC SMALL LETTER DZHE - 0x00a1: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00a2: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00a3: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x00a5: 0x0490, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN - 0x00a8: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00aa: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00af: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00b2: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00b3: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00b4: 0x0491, # CYRILLIC SMALL LETTER GHE WITH UPTURN - 0x00b8: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00b9: 0x2116, # NUMERO SIGN - 0x00ba: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00bc: 0x0458, # CYRILLIC SMALL LETTER JE - 0x00bd: 0x0405, # CYRILLIC CAPITAL LETTER DZE - 0x00be: 0x0455, # CYRILLIC SMALL LETTER DZE - 0x00bf: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00c0: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00c1: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00c2: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00c3: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00c4: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00c5: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00c6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00c7: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00c8: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00c9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00ca: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00cb: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00cc: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00cd: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00ce: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00cf: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00d0: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00d1: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00d2: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00d3: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00d4: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00d5: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00d6: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00d7: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00d8: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00d9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00da: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x00db: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00dc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00dd: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00de: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x00df: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O - 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E - 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA + 0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE + 0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0453, # CYRILLIC SMALL LETTER GJE + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x20ac, # EURO SIGN + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0409, # CYRILLIC CAPITAL LETTER LJE + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x040a, # CYRILLIC CAPITAL LETTER NJE + 0x008d: 0x040c, # CYRILLIC CAPITAL LETTER KJE + 0x008e: 0x040b, # CYRILLIC CAPITAL LETTER TSHE + 0x008f: 0x040f, # CYRILLIC CAPITAL LETTER DZHE + 0x0090: 0x0452, # CYRILLIC SMALL LETTER DJE + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: None, # UNDEFINED + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0459, # CYRILLIC SMALL LETTER LJE + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x045a, # CYRILLIC SMALL LETTER NJE + 0x009d: 0x045c, # CYRILLIC SMALL LETTER KJE + 0x009e: 0x045b, # CYRILLIC SMALL LETTER TSHE + 0x009f: 0x045f, # CYRILLIC SMALL LETTER DZHE + 0x00a1: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x00a2: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x00a3: 0x0408, # CYRILLIC CAPITAL LETTER JE + 0x00a5: 0x0490, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x00a8: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00aa: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x00af: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x00b2: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00b3: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00b4: 0x0491, # CYRILLIC SMALL LETTER GHE WITH UPTURN + 0x00b8: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00b9: 0x2116, # NUMERO SIGN + 0x00ba: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x00bc: 0x0458, # CYRILLIC SMALL LETTER JE + 0x00bd: 0x0405, # CYRILLIC CAPITAL LETTER DZE + 0x00be: 0x0455, # CYRILLIC SMALL LETTER DZE + 0x00bf: 0x0457, # CYRILLIC SMALL LETTER YI + 0x00c0: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x00c1: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x00c2: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x00c3: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x00c4: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x00c5: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x00c6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x00c7: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x00c8: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x00c9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x00ca: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x00cb: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x00cc: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x00cd: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x00ce: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x00cf: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x00d0: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x00d1: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x00d2: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x00d3: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x00d4: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x00d5: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x00d6: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x00d7: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x00d8: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x00d9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x00da: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x00db: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x00dc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x00dd: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x00de: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x00df: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I + 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O + 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U + 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E + 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0402' # 0x0080 -> CYRILLIC CAPITAL LETTER DJE + u'\u0403' # 0x0081 -> CYRILLIC CAPITAL LETTER GJE + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\u0453' # 0x0083 -> CYRILLIC SMALL LETTER GJE + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\u20ac' # 0x0088 -> EURO SIGN + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\u0409' # 0x008a -> CYRILLIC CAPITAL LETTER LJE + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u040a' # 0x008c -> CYRILLIC CAPITAL LETTER NJE + u'\u040c' # 0x008d -> CYRILLIC CAPITAL LETTER KJE + u'\u040b' # 0x008e -> CYRILLIC CAPITAL LETTER TSHE + u'\u040f' # 0x008f -> CYRILLIC CAPITAL LETTER DZHE + u'\u0452' # 0x0090 -> CYRILLIC SMALL LETTER DJE + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\ufffe' # 0x0098 -> UNDEFINED + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\u0459' # 0x009a -> CYRILLIC SMALL LETTER LJE + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u045a' # 0x009c -> CYRILLIC SMALL LETTER NJE + u'\u045c' # 0x009d -> CYRILLIC SMALL LETTER KJE + u'\u045b' # 0x009e -> CYRILLIC SMALL LETTER TSHE + u'\u045f' # 0x009f -> CYRILLIC SMALL LETTER DZHE + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u040e' # 0x00a1 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045e' # 0x00a2 -> CYRILLIC SMALL LETTER SHORT U + u'\u0408' # 0x00a3 -> CYRILLIC CAPITAL LETTER JE + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\u0490' # 0x00a5 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\u0401' # 0x00a8 -> CYRILLIC CAPITAL LETTER IO + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u0404' # 0x00aa -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\u0407' # 0x00af -> CYRILLIC CAPITAL LETTER YI + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u0406' # 0x00b2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0456' # 0x00b3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0491' # 0x00b4 -> CYRILLIC SMALL LETTER GHE WITH UPTURN + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\u0451' # 0x00b8 -> CYRILLIC SMALL LETTER IO + u'\u2116' # 0x00b9 -> NUMERO SIGN + u'\u0454' # 0x00ba -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0458' # 0x00bc -> CYRILLIC SMALL LETTER JE + u'\u0405' # 0x00bd -> CYRILLIC CAPITAL LETTER DZE + u'\u0455' # 0x00be -> CYRILLIC SMALL LETTER DZE + u'\u0457' # 0x00bf -> CYRILLIC SMALL LETTER YI + u'\u0410' # 0x00c0 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0x00c1 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0x00c2 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0x00c3 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0x00c4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0x00c5 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0x00c6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0x00c7 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0x00c8 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0x00c9 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0x00ca -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0x00cb -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0x00cc -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0x00cd -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0x00ce -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0x00cf -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0x00d0 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0x00d1 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0x00d2 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0x00d3 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0x00d4 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0x00d5 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0x00d6 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0x00d7 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0x00d8 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0x00d9 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0x00da -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0x00db -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0x00dc -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0x00dd -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0x00de -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0x00df -> CYRILLIC CAPITAL LETTER YA + u'\u0430' # 0x00e0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0x00e1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0x00e2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0x00e3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0x00e4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0x00e5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0x00e6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0x00e7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0x00e8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0x00e9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0x00ea -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0x00eb -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0x00ec -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0x00ed -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0x00ee -> CYRILLIC SMALL LETTER O + u'\u043f' # 0x00ef -> CYRILLIC SMALL LETTER PE + u'\u0440' # 0x00f0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0x00f1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0x00f2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0x00f3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0x00f4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0x00f5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0x00f6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0x00f7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0x00f8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0x00f9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0x00fa -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0x00fb -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0x00fc -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0x00fd -> CYRILLIC SMALL LETTER E + u'\u044e' # 0x00fe -> CYRILLIC SMALL LETTER YU + u'\u044f' # 0x00ff -> CYRILLIC SMALL LETTER YA +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x0401: 0x00a8, # CYRILLIC CAPITAL LETTER IO + 0x0402: 0x0080, # CYRILLIC CAPITAL LETTER DJE + 0x0403: 0x0081, # CYRILLIC CAPITAL LETTER GJE + 0x0404: 0x00aa, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0x00bd, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0x00b2, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0x00af, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0x00a3, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0x008a, # CYRILLIC CAPITAL LETTER LJE + 0x040a: 0x008c, # CYRILLIC CAPITAL LETTER NJE + 0x040b: 0x008e, # CYRILLIC CAPITAL LETTER TSHE + 0x040c: 0x008d, # CYRILLIC CAPITAL LETTER KJE + 0x040e: 0x00a1, # CYRILLIC CAPITAL LETTER SHORT U + 0x040f: 0x008f, # CYRILLIC CAPITAL LETTER DZHE + 0x0410: 0x00c0, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x00c1, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x00c2, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x00c3, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x00c4, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x00c5, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x00c6, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x00c7, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x00c8, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x00c9, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x00ca, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x00cb, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x00cc, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x00cd, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x00ce, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x00cf, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x00d0, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x00d1, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x00d2, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x00d3, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x00d4, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x00d5, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x00d6, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x00d7, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x00d8, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x00d9, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x00da, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x00db, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x00dc, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x00dd, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x00de, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x00df, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00e0, # CYRILLIC SMALL LETTER A + 0x0431: 0x00e1, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00e2, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00e3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00e4, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00e5, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00e6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00e7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00e8, # CYRILLIC SMALL LETTER I + 0x0439: 0x00e9, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00ea, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00eb, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00ec, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00ed, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00ee, # CYRILLIC SMALL LETTER O + 0x043f: 0x00ef, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00f0, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00f1, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00f2, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00f3, # CYRILLIC SMALL LETTER U + 0x0444: 0x00f4, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00f5, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00f6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00f7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00f8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00f9, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x00fa, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00fb, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00fc, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00fd, # CYRILLIC SMALL LETTER E + 0x044e: 0x00fe, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00ff, # CYRILLIC SMALL LETTER YA + 0x0451: 0x00b8, # CYRILLIC SMALL LETTER IO + 0x0452: 0x0090, # CYRILLIC SMALL LETTER DJE + 0x0453: 0x0083, # CYRILLIC SMALL LETTER GJE + 0x0454: 0x00ba, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0x00be, # CYRILLIC SMALL LETTER DZE + 0x0456: 0x00b3, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0x00bf, # CYRILLIC SMALL LETTER YI + 0x0458: 0x00bc, # CYRILLIC SMALL LETTER JE + 0x0459: 0x009a, # CYRILLIC SMALL LETTER LJE + 0x045a: 0x009c, # CYRILLIC SMALL LETTER NJE + 0x045b: 0x009e, # CYRILLIC SMALL LETTER TSHE + 0x045c: 0x009d, # CYRILLIC SMALL LETTER KJE + 0x045e: 0x00a2, # CYRILLIC SMALL LETTER SHORT U + 0x045f: 0x009f, # CYRILLIC SMALL LETTER DZHE + 0x0490: 0x00a5, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x0491: 0x00b4, # CYRILLIC SMALL LETTER GHE WITH UPTURN + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x0088, # EURO SIGN + 0x2116: 0x00b9, # NUMERO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +} \ No newline at end of file Index: cp1252.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1252.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp1252.py 8 Aug 2002 20:19:18 -0000 1.4 +++ cp1252.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1252.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1252.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,40 +32,553 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x008d: None, # UNDEFINED - 0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x02dc, # SMALL TILDE - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x0153, # LATIN SMALL LIGATURE OE - 0x009d: None, # UNDEFINED - 0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x008d: None, # UNDEFINED + 0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x02dc, # SMALL TILDE + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x0153, # LATIN SMALL LIGATURE OE + 0x009d: None, # UNDEFINED + 0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x0083 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\u02c6' # 0x0088 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\u0160' # 0x008a -> LATIN CAPITAL LETTER S WITH CARON + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x008c -> LATIN CAPITAL LIGATURE OE + u'\ufffe' # 0x008d -> UNDEFINED + u'\u017d' # 0x008e -> LATIN CAPITAL LETTER Z WITH CARON + u'\ufffe' # 0x008f -> UNDEFINED + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\u02dc' # 0x0098 -> SMALL TILDE + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\u0161' # 0x009a -> LATIN SMALL LETTER S WITH CARON + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x009c -> LATIN SMALL LIGATURE OE + u'\ufffe' # 0x009d -> UNDEFINED + u'\u017e' # 0x009e -> LATIN SMALL LETTER Z WITH CARON + u'\u0178' # 0x009f -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\xa1' # 0x00a1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xaa' # 0x00aa -> FEMININE ORDINAL INDICATOR + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xba' # 0x00ba -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0x00bf -> INVERTED QUESTION MARK + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0x00d0 -> LATIN CAPITAL LETTER ETH + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0x00dd -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0x00de -> LATIN CAPITAL LETTER THORN + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0x00f0 -> LATIN SMALL LETTER ETH + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0x00fd -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x00fe -> LATIN SMALL LETTER THORN + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00ba: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00bf, # INVERTED QUESTION MARK + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x00de, # LATIN CAPITAL LETTER THORN + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x00f0, # LATIN SMALL LETTER ETH + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x00fe, # LATIN SMALL LETTER THORN + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0152: 0x008c, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x009c, # LATIN SMALL LIGATURE OE + 0x0160: 0x008a, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x009a, # LATIN SMALL LETTER S WITH CARON + 0x0178: 0x009f, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x017d: 0x008e, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x009e, # LATIN SMALL LETTER Z WITH CARON + 0x0192: 0x0083, # LATIN SMALL LETTER F WITH HOOK + 0x02c6: 0x0088, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02dc: 0x0098, # SMALL TILDE + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +} \ No newline at end of file Index: cp1253.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1253.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp1253.py 8 Aug 2002 20:19:18 -0000 1.4 +++ cp1253.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1253.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1253.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,115 +32,616 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: None, # UNDEFINED - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: None, # UNDEFINED - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: None, # UNDEFINED - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: None, # UNDEFINED - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: None, # UNDEFINED - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: None, # UNDEFINED - 0x009d: None, # UNDEFINED - 0x009e: None, # UNDEFINED - 0x009f: None, # UNDEFINED - 0x00a1: 0x0385, # GREEK DIALYTIKA TONOS - 0x00a2: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x00aa: None, # UNDEFINED - 0x00af: 0x2015, # HORIZONTAL BAR - 0x00b4: 0x0384, # GREEK TONOS - 0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA - 0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA - 0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x00cc: 0x039c, # GREEK CAPITAL LETTER MU - 0x00cd: 0x039d, # GREEK CAPITAL LETTER NU - 0x00ce: 0x039e, # GREEK CAPITAL LETTER XI - 0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI - 0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x00d2: None, # UNDEFINED - 0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA - 0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA - 0x00e7: 0x03b7, # GREEK SMALL LETTER ETA - 0x00e8: 0x03b8, # GREEK SMALL LETTER THETA - 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00ec: 0x03bc, # GREEK SMALL LETTER MU - 0x00ed: 0x03bd, # GREEK SMALL LETTER NU - 0x00ee: 0x03be, # GREEK SMALL LETTER XI - 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00f0: 0x03c0, # GREEK SMALL LETTER PI - 0x00f1: 0x03c1, # GREEK SMALL LETTER RHO - 0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU - 0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00f6: 0x03c6, # GREEK SMALL LETTER PHI - 0x00f7: 0x03c7, # GREEK SMALL LETTER CHI - 0x00f8: 0x03c8, # GREEK SMALL LETTER PSI - 0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00ff: None, # UNDEFINED + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: None, # UNDEFINED + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: None, # UNDEFINED + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: None, # UNDEFINED + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: None, # UNDEFINED + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: None, # UNDEFINED + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: None, # UNDEFINED + 0x009d: None, # UNDEFINED + 0x009e: None, # UNDEFINED + 0x009f: None, # UNDEFINED + 0x00a1: 0x0385, # GREEK DIALYTIKA TONOS + 0x00a2: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x00aa: None, # UNDEFINED + 0x00af: 0x2015, # HORIZONTAL BAR + 0x00b4: 0x0384, # GREEK TONOS + 0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA + 0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA + 0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x00cc: 0x039c, # GREEK CAPITAL LETTER MU + 0x00cd: 0x039d, # GREEK CAPITAL LETTER NU + 0x00ce: 0x039e, # GREEK CAPITAL LETTER XI + 0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI + 0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00d2: None, # UNDEFINED + 0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA + 0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA + 0x00e7: 0x03b7, # GREEK SMALL LETTER ETA + 0x00e8: 0x03b8, # GREEK SMALL LETTER THETA + 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00ec: 0x03bc, # GREEK SMALL LETTER MU + 0x00ed: 0x03bd, # GREEK SMALL LETTER NU + 0x00ee: 0x03be, # GREEK SMALL LETTER XI + 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00f0: 0x03c0, # GREEK SMALL LETTER PI + 0x00f1: 0x03c1, # GREEK SMALL LETTER RHO + 0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU + 0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00f6: 0x03c6, # GREEK SMALL LETTER PHI + 0x00f7: 0x03c7, # GREEK SMALL LETTER CHI + 0x00f8: 0x03c8, # GREEK SMALL LETTER PSI + 0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00ff: None, # UNDEFINED }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x0083 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\ufffe' # 0x0088 -> UNDEFINED + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\ufffe' # 0x008a -> UNDEFINED + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x008c -> UNDEFINED + u'\ufffe' # 0x008d -> UNDEFINED + u'\ufffe' # 0x008e -> UNDEFINED + u'\ufffe' # 0x008f -> UNDEFINED + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\ufffe' # 0x0098 -> UNDEFINED + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\ufffe' # 0x009a -> UNDEFINED + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x009c -> UNDEFINED + u'\ufffe' # 0x009d -> UNDEFINED + u'\ufffe' # 0x009e -> UNDEFINED + u'\ufffe' # 0x009f -> UNDEFINED + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0385' # 0x00a1 -> GREEK DIALYTIKA TONOS + u'\u0386' # 0x00a2 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\ufffe' # 0x00aa -> UNDEFINED + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\u2015' # 0x00af -> HORIZONTAL BAR + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\u0384' # 0x00b4 -> GREEK TONOS + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\u0388' # 0x00b8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0x00b9 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0x00ba -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u038c' # 0x00bc -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\u038e' # 0x00be -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0x00bf -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\u0390' # 0x00c0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u0391' # 0x00c1 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0x00c2 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0x00c3 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x00c4 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0x00c5 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x00c6 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x00c7 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0x00c8 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0x00c9 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0x00ca -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0x00cb -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0x00cc -> GREEK CAPITAL LETTER MU + u'\u039d' # 0x00cd -> GREEK CAPITAL LETTER NU + u'\u039e' # 0x00ce -> GREEK CAPITAL LETTER XI + u'\u039f' # 0x00cf -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0x00d0 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0x00d1 -> GREEK CAPITAL LETTER RHO + u'\ufffe' # 0x00d2 -> UNDEFINED + u'\u03a3' # 0x00d3 -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0x00d4 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0x00d5 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0x00d6 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0x00d7 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0x00d8 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x00d9 -> GREEK CAPITAL LETTER OMEGA + u'\u03aa' # 0x00da -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0x00db -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\u03ac' # 0x00dc -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0x00dd -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0x00de -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0x00df -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03b0' # 0x00e0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u03b1' # 0x00e1 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x00e2 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0x00e3 -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0x00e4 -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x00e5 -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0x00e6 -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0x00e7 -> GREEK SMALL LETTER ETA + u'\u03b8' # 0x00e8 -> GREEK SMALL LETTER THETA + u'\u03b9' # 0x00e9 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0x00ea -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x00eb -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x00ec -> GREEK SMALL LETTER MU + u'\u03bd' # 0x00ed -> GREEK SMALL LETTER NU + u'\u03be' # 0x00ee -> GREEK SMALL LETTER XI + u'\u03bf' # 0x00ef -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0x00f0 -> GREEK SMALL LETTER PI + u'\u03c1' # 0x00f1 -> GREEK SMALL LETTER RHO + u'\u03c2' # 0x00f2 -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c3' # 0x00f3 -> GREEK SMALL LETTER SIGMA + u'\u03c4' # 0x00f4 -> GREEK SMALL LETTER TAU + u'\u03c5' # 0x00f5 -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0x00f6 -> GREEK SMALL LETTER PHI + u'\u03c7' # 0x00f7 -> GREEK SMALL LETTER CHI + u'\u03c8' # 0x00f8 -> GREEK SMALL LETTER PSI + u'\u03c9' # 0x00f9 -> GREEK SMALL LETTER OMEGA + u'\u03ca' # 0x00fa -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03cb' # 0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03cc' # 0x00fc -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0x00fd -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03ce' # 0x00fe -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\ufffe' # 0x00ff -> UNDEFINED +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x0192: 0x0083, # LATIN SMALL LETTER F WITH HOOK + 0x0384: 0x00b4, # GREEK TONOS + 0x0385: 0x00a1, # GREEK DIALYTIKA TONOS + 0x0386: 0x00a2, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0x00b8, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0x00b9, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038a: 0x00ba, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038c: 0x00bc, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038e: 0x00be, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038f: 0x00bf, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0x00c0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0x00c1, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0x00c2, # GREEK CAPITAL LETTER BETA + 0x0393: 0x00c3, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0x00c4, # GREEK CAPITAL LETTER DELTA + 0x0395: 0x00c5, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0x00c6, # GREEK CAPITAL LETTER ZETA + 0x0397: 0x00c7, # GREEK CAPITAL LETTER ETA + 0x0398: 0x00c8, # GREEK CAPITAL LETTER THETA + 0x0399: 0x00c9, # GREEK CAPITAL LETTER IOTA + 0x039a: 0x00ca, # GREEK CAPITAL LETTER KAPPA + 0x039b: 0x00cb, # GREEK CAPITAL LETTER LAMDA + 0x039c: 0x00cc, # GREEK CAPITAL LETTER MU + 0x039d: 0x00cd, # GREEK CAPITAL LETTER NU + 0x039e: 0x00ce, # GREEK CAPITAL LETTER XI + 0x039f: 0x00cf, # GREEK CAPITAL LETTER OMICRON + 0x03a0: 0x00d0, # GREEK CAPITAL LETTER PI + 0x03a1: 0x00d1, # GREEK CAPITAL LETTER RHO + 0x03a3: 0x00d3, # GREEK CAPITAL LETTER SIGMA + 0x03a4: 0x00d4, # GREEK CAPITAL LETTER TAU + 0x03a5: 0x00d5, # GREEK CAPITAL LETTER UPSILON + 0x03a6: 0x00d6, # GREEK CAPITAL LETTER PHI + 0x03a7: 0x00d7, # GREEK CAPITAL LETTER CHI + 0x03a8: 0x00d8, # GREEK CAPITAL LETTER PSI + 0x03a9: 0x00d9, # GREEK CAPITAL LETTER OMEGA + 0x03aa: 0x00da, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03ab: 0x00db, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03ac: 0x00dc, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03ad: 0x00dd, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03ae: 0x00de, # GREEK SMALL LETTER ETA WITH TONOS + 0x03af: 0x00df, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03b0: 0x00e0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03b1: 0x00e1, # GREEK SMALL LETTER ALPHA + 0x03b2: 0x00e2, # GREEK SMALL LETTER BETA + 0x03b3: 0x00e3, # GREEK SMALL LETTER GAMMA + 0x03b4: 0x00e4, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00e5, # GREEK SMALL LETTER EPSILON + 0x03b6: 0x00e6, # GREEK SMALL LETTER ZETA + 0x03b7: 0x00e7, # GREEK SMALL LETTER ETA + 0x03b8: 0x00e8, # GREEK SMALL LETTER THETA + 0x03b9: 0x00e9, # GREEK SMALL LETTER IOTA + 0x03ba: 0x00ea, # GREEK SMALL LETTER KAPPA + 0x03bb: 0x00eb, # GREEK SMALL LETTER LAMDA + 0x03bc: 0x00ec, # GREEK SMALL LETTER MU + 0x03bd: 0x00ed, # GREEK SMALL LETTER NU + 0x03be: 0x00ee, # GREEK SMALL LETTER XI + 0x03bf: 0x00ef, # GREEK SMALL LETTER OMICRON + 0x03c0: 0x00f0, # GREEK SMALL LETTER PI + 0x03c1: 0x00f1, # GREEK SMALL LETTER RHO + 0x03c2: 0x00f2, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0x00f3, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00f4, # GREEK SMALL LETTER TAU + 0x03c5: 0x00f5, # GREEK SMALL LETTER UPSILON + 0x03c6: 0x00f6, # GREEK SMALL LETTER PHI + 0x03c7: 0x00f7, # GREEK SMALL LETTER CHI + 0x03c8: 0x00f8, # GREEK SMALL LETTER PSI + 0x03c9: 0x00f9, # GREEK SMALL LETTER OMEGA + 0x03ca: 0x00fa, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0x00fb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0x00fc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0x00fd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0x00fe, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2015: 0x00af, # HORIZONTAL BAR + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +} \ No newline at end of file Index: cp1254.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1254.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp1254.py 8 Aug 2002 20:19:18 -0000 1.4 +++ cp1254.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1254.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,46 +32,557 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x02dc, # SMALL TILDE - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x0153, # LATIN SMALL LIGATURE OE - 0x009d: None, # UNDEFINED - 0x009e: None, # UNDEFINED - 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x02dc, # SMALL TILDE + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x0153, # LATIN SMALL LIGATURE OE + 0x009d: None, # UNDEFINED + 0x009e: None, # UNDEFINED + 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x0083 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\u02c6' # 0x0088 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\u0160' # 0x008a -> LATIN CAPITAL LETTER S WITH CARON + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x008c -> LATIN CAPITAL LIGATURE OE + u'\ufffe' # 0x008d -> UNDEFINED + u'\ufffe' # 0x008e -> UNDEFINED + u'\ufffe' # 0x008f -> UNDEFINED + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\u02dc' # 0x0098 -> SMALL TILDE + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\u0161' # 0x009a -> LATIN SMALL LETTER S WITH CARON + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x009c -> LATIN SMALL LIGATURE OE + u'\ufffe' # 0x009d -> UNDEFINED + u'\ufffe' # 0x009e -> UNDEFINED + u'\u0178' # 0x009f -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\xa1' # 0x00a1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xaa' # 0x00aa -> FEMININE ORDINAL INDICATOR + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xba' # 0x00ba -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0x00bf -> INVERTED QUESTION MARK + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u011e' # 0x00d0 -> LATIN CAPITAL LETTER G WITH BREVE + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0130' # 0x00dd -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u015e' # 0x00de -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u011f' # 0x00f0 -> LATIN SMALL LETTER G WITH BREVE + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0131' # 0x00fd -> LATIN SMALL LETTER DOTLESS I + u'\u015f' # 0x00fe -> LATIN SMALL LETTER S WITH CEDILLA + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00ba: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00bf, # INVERTED QUESTION MARK + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011e: 0x00d0, # LATIN CAPITAL LETTER G WITH BREVE + 0x011f: 0x00f0, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0x00dd, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0x00fd, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0x008c, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x009c, # LATIN SMALL LIGATURE OE + 0x015e: 0x00de, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00fe, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0x008a, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x009a, # LATIN SMALL LETTER S WITH CARON + 0x0178: 0x009f, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0x0083, # LATIN SMALL LETTER F WITH HOOK + 0x02c6: 0x0088, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02dc: 0x0098, # SMALL TILDE + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +} \ No newline at end of file Index: cp1255.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1255.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp1255.py 8 Aug 2002 20:19:18 -0000 1.4 +++ cp1255.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1255.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1255.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,107 +32,602 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: None, # UNDEFINED - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: None, # UNDEFINED - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x02dc, # SMALL TILDE - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: None, # UNDEFINED - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: None, # UNDEFINED - 0x009d: None, # UNDEFINED - 0x009e: None, # UNDEFINED - 0x009f: None, # UNDEFINED - 0x00a4: 0x20aa, # NEW SHEQEL SIGN - 0x00aa: 0x00d7, # MULTIPLICATION SIGN - 0x00ba: 0x00f7, # DIVISION SIGN - 0x00c0: 0x05b0, # HEBREW POINT SHEVA - 0x00c1: 0x05b1, # HEBREW POINT HATAF SEGOL - 0x00c2: 0x05b2, # HEBREW POINT HATAF PATAH - 0x00c3: 0x05b3, # HEBREW POINT HATAF QAMATS - 0x00c4: 0x05b4, # HEBREW POINT HIRIQ - 0x00c5: 0x05b5, # HEBREW POINT TSERE - 0x00c6: 0x05b6, # HEBREW POINT SEGOL - 0x00c7: 0x05b7, # HEBREW POINT PATAH - 0x00c8: 0x05b8, # HEBREW POINT QAMATS - 0x00c9: 0x05b9, # HEBREW POINT HOLAM - 0x00ca: None, # UNDEFINED - 0x00cb: 0x05bb, # HEBREW POINT QUBUTS - 0x00cc: 0x05bc, # HEBREW POINT DAGESH OR MAPIQ - 0x00cd: 0x05bd, # HEBREW POINT METEG - 0x00ce: 0x05be, # HEBREW PUNCTUATION MAQAF - 0x00cf: 0x05bf, # HEBREW POINT RAFE - 0x00d0: 0x05c0, # HEBREW PUNCTUATION PASEQ - 0x00d1: 0x05c1, # HEBREW POINT SHIN DOT - 0x00d2: 0x05c2, # HEBREW POINT SIN DOT - 0x00d3: 0x05c3, # HEBREW PUNCTUATION SOF PASUQ - 0x00d4: 0x05f0, # HEBREW LIGATURE YIDDISH DOUBLE VAV - 0x00d5: 0x05f1, # HEBREW LIGATURE YIDDISH VAV YOD - 0x00d6: 0x05f2, # HEBREW LIGATURE YIDDISH DOUBLE YOD - 0x00d7: 0x05f3, # HEBREW PUNCTUATION GERESH - 0x00d8: 0x05f4, # HEBREW PUNCTUATION GERSHAYIM - 0x00d9: None, # UNDEFINED - 0x00da: None, # UNDEFINED - 0x00db: None, # UNDEFINED - 0x00dc: None, # UNDEFINED - 0x00dd: None, # UNDEFINED - 0x00de: None, # UNDEFINED - 0x00df: None, # UNDEFINED - 0x00e0: 0x05d0, # HEBREW LETTER ALEF - 0x00e1: 0x05d1, # HEBREW LETTER BET - 0x00e2: 0x05d2, # HEBREW LETTER GIMEL - 0x00e3: 0x05d3, # HEBREW LETTER DALET - 0x00e4: 0x05d4, # HEBREW LETTER HE - 0x00e5: 0x05d5, # HEBREW LETTER VAV - 0x00e6: 0x05d6, # HEBREW LETTER ZAYIN - 0x00e7: 0x05d7, # HEBREW LETTER HET - 0x00e8: 0x05d8, # HEBREW LETTER TET - 0x00e9: 0x05d9, # HEBREW LETTER YOD - 0x00ea: 0x05da, # HEBREW LETTER FINAL KAF - 0x00eb: 0x05db, # HEBREW LETTER KAF - 0x00ec: 0x05dc, # HEBREW LETTER LAMED - 0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM - 0x00ee: 0x05de, # HEBREW LETTER MEM - 0x00ef: 0x05df, # HEBREW LETTER FINAL NUN - 0x00f0: 0x05e0, # HEBREW LETTER NUN - 0x00f1: 0x05e1, # HEBREW LETTER SAMEKH - 0x00f2: 0x05e2, # HEBREW LETTER AYIN - 0x00f3: 0x05e3, # HEBREW LETTER FINAL PE - 0x00f4: 0x05e4, # HEBREW LETTER PE - 0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x00f6: 0x05e6, # HEBREW LETTER TSADI - 0x00f7: 0x05e7, # HEBREW LETTER QOF - 0x00f8: 0x05e8, # HEBREW LETTER RESH - 0x00f9: 0x05e9, # HEBREW LETTER SHIN - 0x00fa: 0x05ea, # HEBREW LETTER TAV - 0x00fb: None, # UNDEFINED - 0x00fc: None, # UNDEFINED - 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK - 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK - 0x00ff: None, # UNDEFINED + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: None, # UNDEFINED + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: None, # UNDEFINED + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x02dc, # SMALL TILDE + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: None, # UNDEFINED + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: None, # UNDEFINED + 0x009d: None, # UNDEFINED + 0x009e: None, # UNDEFINED + 0x009f: None, # UNDEFINED + 0x00a4: 0x20aa, # NEW SHEQEL SIGN + 0x00aa: 0x00d7, # MULTIPLICATION SIGN + 0x00ba: 0x00f7, # DIVISION SIGN + 0x00c0: 0x05b0, # HEBREW POINT SHEVA + 0x00c1: 0x05b1, # HEBREW POINT HATAF SEGOL + 0x00c2: 0x05b2, # HEBREW POINT HATAF PATAH + 0x00c3: 0x05b3, # HEBREW POINT HATAF QAMATS + 0x00c4: 0x05b4, # HEBREW POINT HIRIQ + 0x00c5: 0x05b5, # HEBREW POINT TSERE + 0x00c6: 0x05b6, # HEBREW POINT SEGOL + 0x00c7: 0x05b7, # HEBREW POINT PATAH + 0x00c8: 0x05b8, # HEBREW POINT QAMATS + 0x00c9: 0x05b9, # HEBREW POINT HOLAM + 0x00ca: None, # UNDEFINED + 0x00cb: 0x05bb, # HEBREW POINT QUBUTS + 0x00cc: 0x05bc, # HEBREW POINT DAGESH OR MAPIQ + 0x00cd: 0x05bd, # HEBREW POINT METEG + 0x00ce: 0x05be, # HEBREW PUNCTUATION MAQAF + 0x00cf: 0x05bf, # HEBREW POINT RAFE + 0x00d0: 0x05c0, # HEBREW PUNCTUATION PASEQ + 0x00d1: 0x05c1, # HEBREW POINT SHIN DOT + 0x00d2: 0x05c2, # HEBREW POINT SIN DOT + 0x00d3: 0x05c3, # HEBREW PUNCTUATION SOF PASUQ + 0x00d4: 0x05f0, # HEBREW LIGATURE YIDDISH DOUBLE VAV + 0x00d5: 0x05f1, # HEBREW LIGATURE YIDDISH VAV YOD + 0x00d6: 0x05f2, # HEBREW LIGATURE YIDDISH DOUBLE YOD + 0x00d7: 0x05f3, # HEBREW PUNCTUATION GERESH + 0x00d8: 0x05f4, # HEBREW PUNCTUATION GERSHAYIM + 0x00d9: None, # UNDEFINED + 0x00da: None, # UNDEFINED + 0x00db: None, # UNDEFINED + 0x00dc: None, # UNDEFINED + 0x00dd: None, # UNDEFINED + 0x00de: None, # UNDEFINED + 0x00df: None, # UNDEFINED + 0x00e0: 0x05d0, # HEBREW LETTER ALEF + 0x00e1: 0x05d1, # HEBREW LETTER BET + 0x00e2: 0x05d2, # HEBREW LETTER GIMEL + 0x00e3: 0x05d3, # HEBREW LETTER DALET + 0x00e4: 0x05d4, # HEBREW LETTER HE + 0x00e5: 0x05d5, # HEBREW LETTER VAV + 0x00e6: 0x05d6, # HEBREW LETTER ZAYIN + 0x00e7: 0x05d7, # HEBREW LETTER HET + 0x00e8: 0x05d8, # HEBREW LETTER TET + 0x00e9: 0x05d9, # HEBREW LETTER YOD + 0x00ea: 0x05da, # HEBREW LETTER FINAL KAF + 0x00eb: 0x05db, # HEBREW LETTER KAF + 0x00ec: 0x05dc, # HEBREW LETTER LAMED + 0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM + 0x00ee: 0x05de, # HEBREW LETTER MEM + 0x00ef: 0x05df, # HEBREW LETTER FINAL NUN + 0x00f0: 0x05e0, # HEBREW LETTER NUN + 0x00f1: 0x05e1, # HEBREW LETTER SAMEKH + 0x00f2: 0x05e2, # HEBREW LETTER AYIN + 0x00f3: 0x05e3, # HEBREW LETTER FINAL PE + 0x00f4: 0x05e4, # HEBREW LETTER PE + 0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x00f6: 0x05e6, # HEBREW LETTER TSADI + 0x00f7: 0x05e7, # HEBREW LETTER QOF + 0x00f8: 0x05e8, # HEBREW LETTER RESH + 0x00f9: 0x05e9, # HEBREW LETTER SHIN + 0x00fa: 0x05ea, # HEBREW LETTER TAV + 0x00fb: None, # UNDEFINED + 0x00fc: None, # UNDEFINED + 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK + 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK + 0x00ff: None, # UNDEFINED }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x0083 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\u02c6' # 0x0088 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\ufffe' # 0x008a -> UNDEFINED + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x008c -> UNDEFINED + u'\ufffe' # 0x008d -> UNDEFINED + u'\ufffe' # 0x008e -> UNDEFINED + u'\ufffe' # 0x008f -> UNDEFINED + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\u02dc' # 0x0098 -> SMALL TILDE + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\ufffe' # 0x009a -> UNDEFINED + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x009c -> UNDEFINED + u'\ufffe' # 0x009d -> UNDEFINED + u'\ufffe' # 0x009e -> UNDEFINED + u'\ufffe' # 0x009f -> UNDEFINED + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\xa1' # 0x00a1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\u20aa' # 0x00a4 -> NEW SHEQEL SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xd7' # 0x00aa -> MULTIPLICATION SIGN + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xf7' # 0x00ba -> DIVISION SIGN + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0x00bf -> INVERTED QUESTION MARK + u'\u05b0' # 0x00c0 -> HEBREW POINT SHEVA + u'\u05b1' # 0x00c1 -> HEBREW POINT HATAF SEGOL + u'\u05b2' # 0x00c2 -> HEBREW POINT HATAF PATAH + u'\u05b3' # 0x00c3 -> HEBREW POINT HATAF QAMATS + u'\u05b4' # 0x00c4 -> HEBREW POINT HIRIQ + u'\u05b5' # 0x00c5 -> HEBREW POINT TSERE + u'\u05b6' # 0x00c6 -> HEBREW POINT SEGOL + u'\u05b7' # 0x00c7 -> HEBREW POINT PATAH + u'\u05b8' # 0x00c8 -> HEBREW POINT QAMATS + u'\u05b9' # 0x00c9 -> HEBREW POINT HOLAM + u'\ufffe' # 0x00ca -> UNDEFINED + u'\u05bb' # 0x00cb -> HEBREW POINT QUBUTS + u'\u05bc' # 0x00cc -> HEBREW POINT DAGESH OR MAPIQ + u'\u05bd' # 0x00cd -> HEBREW POINT METEG + u'\u05be' # 0x00ce -> HEBREW PUNCTUATION MAQAF + u'\u05bf' # 0x00cf -> HEBREW POINT RAFE + u'\u05c0' # 0x00d0 -> HEBREW PUNCTUATION PASEQ + u'\u05c1' # 0x00d1 -> HEBREW POINT SHIN DOT + u'\u05c2' # 0x00d2 -> HEBREW POINT SIN DOT + u'\u05c3' # 0x00d3 -> HEBREW PUNCTUATION SOF PASUQ + u'\u05f0' # 0x00d4 -> HEBREW LIGATURE YIDDISH DOUBLE VAV + u'\u05f1' # 0x00d5 -> HEBREW LIGATURE YIDDISH VAV YOD + u'\u05f2' # 0x00d6 -> HEBREW LIGATURE YIDDISH DOUBLE YOD + u'\u05f3' # 0x00d7 -> HEBREW PUNCTUATION GERESH + u'\u05f4' # 0x00d8 -> HEBREW PUNCTUATION GERSHAYIM + u'\ufffe' # 0x00d9 -> UNDEFINED + u'\ufffe' # 0x00da -> UNDEFINED + u'\ufffe' # 0x00db -> UNDEFINED + u'\ufffe' # 0x00dc -> UNDEFINED + u'\ufffe' # 0x00dd -> UNDEFINED + u'\ufffe' # 0x00de -> UNDEFINED + u'\ufffe' # 0x00df -> UNDEFINED + u'\u05d0' # 0x00e0 -> HEBREW LETTER ALEF + u'\u05d1' # 0x00e1 -> HEBREW LETTER BET + u'\u05d2' # 0x00e2 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x00e3 -> HEBREW LETTER DALET + u'\u05d4' # 0x00e4 -> HEBREW LETTER HE + u'\u05d5' # 0x00e5 -> HEBREW LETTER VAV + u'\u05d6' # 0x00e6 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x00e7 -> HEBREW LETTER HET + u'\u05d8' # 0x00e8 -> HEBREW LETTER TET + u'\u05d9' # 0x00e9 -> HEBREW LETTER YOD + u'\u05da' # 0x00ea -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x00eb -> HEBREW LETTER KAF + u'\u05dc' # 0x00ec -> HEBREW LETTER LAMED + u'\u05dd' # 0x00ed -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x00ee -> HEBREW LETTER MEM + u'\u05df' # 0x00ef -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x00f0 -> HEBREW LETTER NUN + u'\u05e1' # 0x00f1 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0x00f2 -> HEBREW LETTER AYIN + u'\u05e3' # 0x00f3 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x00f4 -> HEBREW LETTER PE + u'\u05e5' # 0x00f5 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x00f6 -> HEBREW LETTER TSADI + u'\u05e7' # 0x00f7 -> HEBREW LETTER QOF + u'\u05e8' # 0x00f8 -> HEBREW LETTER RESH + u'\u05e9' # 0x00f9 -> HEBREW LETTER SHIN + u'\u05ea' # 0x00fa -> HEBREW LETTER TAV + u'\ufffe' # 0x00fb -> UNDEFINED + u'\ufffe' # 0x00fc -> UNDEFINED + u'\u200e' # 0x00fd -> LEFT-TO-RIGHT MARK + u'\u200f' # 0x00fe -> RIGHT-TO-LEFT MARK + u'\ufffe' # 0x00ff -> UNDEFINED +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00bf, # INVERTED QUESTION MARK + 0x00d7: 0x00aa, # MULTIPLICATION SIGN + 0x00f7: 0x00ba, # DIVISION SIGN + 0x0192: 0x0083, # LATIN SMALL LETTER F WITH HOOK + 0x02c6: 0x0088, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02dc: 0x0098, # SMALL TILDE + 0x05b0: 0x00c0, # HEBREW POINT SHEVA + 0x05b1: 0x00c1, # HEBREW POINT HATAF SEGOL + 0x05b2: 0x00c2, # HEBREW POINT HATAF PATAH + 0x05b3: 0x00c3, # HEBREW POINT HATAF QAMATS + 0x05b4: 0x00c4, # HEBREW POINT HIRIQ + 0x05b5: 0x00c5, # HEBREW POINT TSERE + 0x05b6: 0x00c6, # HEBREW POINT SEGOL + 0x05b7: 0x00c7, # HEBREW POINT PATAH + 0x05b8: 0x00c8, # HEBREW POINT QAMATS + 0x05b9: 0x00c9, # HEBREW POINT HOLAM + 0x05bb: 0x00cb, # HEBREW POINT QUBUTS + 0x05bc: 0x00cc, # HEBREW POINT DAGESH OR MAPIQ + 0x05bd: 0x00cd, # HEBREW POINT METEG + 0x05be: 0x00ce, # HEBREW PUNCTUATION MAQAF + 0x05bf: 0x00cf, # HEBREW POINT RAFE + 0x05c0: 0x00d0, # HEBREW PUNCTUATION PASEQ + 0x05c1: 0x00d1, # HEBREW POINT SHIN DOT + 0x05c2: 0x00d2, # HEBREW POINT SIN DOT + 0x05c3: 0x00d3, # HEBREW PUNCTUATION SOF PASUQ + 0x05d0: 0x00e0, # HEBREW LETTER ALEF + 0x05d1: 0x00e1, # HEBREW LETTER BET + 0x05d2: 0x00e2, # HEBREW LETTER GIMEL + 0x05d3: 0x00e3, # HEBREW LETTER DALET + 0x05d4: 0x00e4, # HEBREW LETTER HE + 0x05d5: 0x00e5, # HEBREW LETTER VAV + 0x05d6: 0x00e6, # HEBREW LETTER ZAYIN + 0x05d7: 0x00e7, # HEBREW LETTER HET + 0x05d8: 0x00e8, # HEBREW LETTER TET + 0x05d9: 0x00e9, # HEBREW LETTER YOD + 0x05da: 0x00ea, # HEBREW LETTER FINAL KAF + 0x05db: 0x00eb, # HEBREW LETTER KAF + 0x05dc: 0x00ec, # HEBREW LETTER LAMED + 0x05dd: 0x00ed, # HEBREW LETTER FINAL MEM + 0x05de: 0x00ee, # HEBREW LETTER MEM + 0x05df: 0x00ef, # HEBREW LETTER FINAL NUN + 0x05e0: 0x00f0, # HEBREW LETTER NUN + 0x05e1: 0x00f1, # HEBREW LETTER SAMEKH + 0x05e2: 0x00f2, # HEBREW LETTER AYIN + 0x05e3: 0x00f3, # HEBREW LETTER FINAL PE + 0x05e4: 0x00f4, # HEBREW LETTER PE + 0x05e5: 0x00f5, # HEBREW LETTER FINAL TSADI + 0x05e6: 0x00f6, # HEBREW LETTER TSADI + 0x05e7: 0x00f7, # HEBREW LETTER QOF + 0x05e8: 0x00f8, # HEBREW LETTER RESH + 0x05e9: 0x00f9, # HEBREW LETTER SHIN + 0x05ea: 0x00fa, # HEBREW LETTER TAV + 0x05f0: 0x00d4, # HEBREW LIGATURE YIDDISH DOUBLE VAV + 0x05f1: 0x00d5, # HEBREW LIGATURE YIDDISH VAV YOD + 0x05f2: 0x00d6, # HEBREW LIGATURE YIDDISH DOUBLE YOD + 0x05f3: 0x00d7, # HEBREW PUNCTUATION GERESH + 0x05f4: 0x00d8, # HEBREW PUNCTUATION GERSHAYIM + 0x200e: 0x00fd, # LEFT-TO-RIGHT MARK + 0x200f: 0x00fe, # RIGHT-TO-LEFT MARK + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20aa: 0x00a4, # NEW SHEQEL SIGN + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +} \ No newline at end of file Index: cp1256.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1256.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp1256.py 8 Aug 2002 20:19:18 -0000 1.4 +++ cp1256.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1256.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,93 +32,611 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: 0x067e, # ARABIC LETTER PEH - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0679, # ARABIC LETTER TTEH - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x008d: 0x0686, # ARABIC LETTER TCHEH - 0x008e: 0x0698, # ARABIC LETTER JEH - 0x008f: 0x0688, # ARABIC LETTER DDAL - 0x0090: 0x06af, # ARABIC LETTER GAF - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x06a9, # ARABIC LETTER KEHEH - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0691, # ARABIC LETTER RREH - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x0153, # LATIN SMALL LIGATURE OE - 0x009d: 0x200c, # ZERO WIDTH NON-JOINER - 0x009e: 0x200d, # ZERO WIDTH JOINER - 0x009f: 0x06ba, # ARABIC LETTER NOON GHUNNA - 0x00a1: 0x060c, # ARABIC COMMA - 0x00aa: 0x06be, # ARABIC LETTER HEH DOACHASHMEE - 0x00ba: 0x061b, # ARABIC SEMICOLON - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: 0x06c1, # ARABIC LETTER HEH GOAL - 0x00c1: 0x0621, # ARABIC LETTER HAMZA - 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x00c7: 0x0627, # ARABIC LETTER ALEF - 0x00c8: 0x0628, # ARABIC LETTER BEH - 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA - 0x00ca: 0x062a, # ARABIC LETTER TEH - 0x00cb: 0x062b, # ARABIC LETTER THEH - 0x00cc: 0x062c, # ARABIC LETTER JEEM - 0x00cd: 0x062d, # ARABIC LETTER HAH - 0x00ce: 0x062e, # ARABIC LETTER KHAH - 0x00cf: 0x062f, # ARABIC LETTER DAL - 0x00d0: 0x0630, # ARABIC LETTER THAL - 0x00d1: 0x0631, # ARABIC LETTER REH - 0x00d2: 0x0632, # ARABIC LETTER ZAIN - 0x00d3: 0x0633, # ARABIC LETTER SEEN - 0x00d4: 0x0634, # ARABIC LETTER SHEEN - 0x00d5: 0x0635, # ARABIC LETTER SAD - 0x00d6: 0x0636, # ARABIC LETTER DAD - 0x00d8: 0x0637, # ARABIC LETTER TAH - 0x00d9: 0x0638, # ARABIC LETTER ZAH - 0x00da: 0x0639, # ARABIC LETTER AIN - 0x00db: 0x063a, # ARABIC LETTER GHAIN - 0x00dc: 0x0640, # ARABIC TATWEEL - 0x00dd: 0x0641, # ARABIC LETTER FEH - 0x00de: 0x0642, # ARABIC LETTER QAF - 0x00df: 0x0643, # ARABIC LETTER KAF - 0x00e1: 0x0644, # ARABIC LETTER LAM - 0x00e3: 0x0645, # ARABIC LETTER MEEM - 0x00e4: 0x0646, # ARABIC LETTER NOON - 0x00e5: 0x0647, # ARABIC LETTER HEH - 0x00e6: 0x0648, # ARABIC LETTER WAW - 0x00ec: 0x0649, # ARABIC LETTER ALEF MAKSURA - 0x00ed: 0x064a, # ARABIC LETTER YEH - 0x00f0: 0x064b, # ARABIC FATHATAN - 0x00f1: 0x064c, # ARABIC DAMMATAN - 0x00f2: 0x064d, # ARABIC KASRATAN - 0x00f3: 0x064e, # ARABIC FATHA - 0x00f5: 0x064f, # ARABIC DAMMA - 0x00f6: 0x0650, # ARABIC KASRA - 0x00f8: 0x0651, # ARABIC SHADDA - 0x00fa: 0x0652, # ARABIC SUKUN - 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK - 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK - 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE + 0x0080: 0x20ac, # EURO SIGN + 0x0081: 0x067e, # ARABIC LETTER PEH + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0679, # ARABIC LETTER TTEH + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x008d: 0x0686, # ARABIC LETTER TCHEH + 0x008e: 0x0698, # ARABIC LETTER JEH + 0x008f: 0x0688, # ARABIC LETTER DDAL + 0x0090: 0x06af, # ARABIC LETTER GAF + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x06a9, # ARABIC LETTER KEHEH + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0691, # ARABIC LETTER RREH + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x0153, # LATIN SMALL LIGATURE OE + 0x009d: 0x200c, # ZERO WIDTH NON-JOINER + 0x009e: 0x200d, # ZERO WIDTH JOINER + 0x009f: 0x06ba, # ARABIC LETTER NOON GHUNNA + 0x00a1: 0x060c, # ARABIC COMMA + 0x00aa: 0x06be, # ARABIC LETTER HEH DOACHASHMEE + 0x00ba: 0x061b, # ARABIC SEMICOLON + 0x00bf: 0x061f, # ARABIC QUESTION MARK + 0x00c0: 0x06c1, # ARABIC LETTER HEH GOAL + 0x00c1: 0x0621, # ARABIC LETTER HAMZA + 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x00c7: 0x0627, # ARABIC LETTER ALEF + 0x00c8: 0x0628, # ARABIC LETTER BEH + 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA + 0x00ca: 0x062a, # ARABIC LETTER TEH + 0x00cb: 0x062b, # ARABIC LETTER THEH + 0x00cc: 0x062c, # ARABIC LETTER JEEM + 0x00cd: 0x062d, # ARABIC LETTER HAH + 0x00ce: 0x062e, # ARABIC LETTER KHAH + 0x00cf: 0x062f, # ARABIC LETTER DAL + 0x00d0: 0x0630, # ARABIC LETTER THAL + 0x00d1: 0x0631, # ARABIC LETTER REH + 0x00d2: 0x0632, # ARABIC LETTER ZAIN + 0x00d3: 0x0633, # ARABIC LETTER SEEN + 0x00d4: 0x0634, # ARABIC LETTER SHEEN + 0x00d5: 0x0635, # ARABIC LETTER SAD + 0x00d6: 0x0636, # ARABIC LETTER DAD + 0x00d8: 0x0637, # ARABIC LETTER TAH + 0x00d9: 0x0638, # ARABIC LETTER ZAH + 0x00da: 0x0639, # ARABIC LETTER AIN + 0x00db: 0x063a, # ARABIC LETTER GHAIN + 0x00dc: 0x0640, # ARABIC TATWEEL + 0x00dd: 0x0641, # ARABIC LETTER FEH + 0x00de: 0x0642, # ARABIC LETTER QAF + 0x00df: 0x0643, # ARABIC LETTER KAF + 0x00e1: 0x0644, # ARABIC LETTER LAM + 0x00e3: 0x0645, # ARABIC LETTER MEEM + 0x00e4: 0x0646, # ARABIC LETTER NOON + 0x00e5: 0x0647, # ARABIC LETTER HEH + 0x00e6: 0x0648, # ARABIC LETTER WAW + 0x00ec: 0x0649, # ARABIC LETTER ALEF MAKSURA + 0x00ed: 0x064a, # ARABIC LETTER YEH + 0x00f0: 0x064b, # ARABIC FATHATAN + 0x00f1: 0x064c, # ARABIC DAMMATAN + 0x00f2: 0x064d, # ARABIC KASRATAN + 0x00f3: 0x064e, # ARABIC FATHA + 0x00f5: 0x064f, # ARABIC DAMMA + 0x00f6: 0x0650, # ARABIC KASRA + 0x00f8: 0x0651, # ARABIC SHADDA + 0x00fa: 0x0652, # ARABIC SUKUN + 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK + 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK + 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\u067e' # 0x0081 -> ARABIC LETTER PEH + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x0083 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\u02c6' # 0x0088 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\u0679' # 0x008a -> ARABIC LETTER TTEH + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x008c -> LATIN CAPITAL LIGATURE OE + u'\u0686' # 0x008d -> ARABIC LETTER TCHEH + u'\u0698' # 0x008e -> ARABIC LETTER JEH + u'\u0688' # 0x008f -> ARABIC LETTER DDAL + u'\u06af' # 0x0090 -> ARABIC LETTER GAF + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\u06a9' # 0x0098 -> ARABIC LETTER KEHEH + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\u0691' # 0x009a -> ARABIC LETTER RREH + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x009c -> LATIN SMALL LIGATURE OE + u'\u200c' # 0x009d -> ZERO WIDTH NON-JOINER + u'\u200d' # 0x009e -> ZERO WIDTH JOINER + u'\u06ba' # 0x009f -> ARABIC LETTER NOON GHUNNA + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u060c' # 0x00a1 -> ARABIC COMMA + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u06be' # 0x00aa -> ARABIC LETTER HEH DOACHASHMEE + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\u061b' # 0x00ba -> ARABIC SEMICOLON + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\u061f' # 0x00bf -> ARABIC QUESTION MARK + u'\u06c1' # 0x00c0 -> ARABIC LETTER HEH GOAL + u'\u0621' # 0x00c1 -> ARABIC LETTER HAMZA + u'\u0622' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0x00c7 -> ARABIC LETTER ALEF + u'\u0628' # 0x00c8 -> ARABIC LETTER BEH + u'\u0629' # 0x00c9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0x00ca -> ARABIC LETTER TEH + u'\u062b' # 0x00cb -> ARABIC LETTER THEH + u'\u062c' # 0x00cc -> ARABIC LETTER JEEM + u'\u062d' # 0x00cd -> ARABIC LETTER HAH + u'\u062e' # 0x00ce -> ARABIC LETTER KHAH + u'\u062f' # 0x00cf -> ARABIC LETTER DAL + u'\u0630' # 0x00d0 -> ARABIC LETTER THAL + u'\u0631' # 0x00d1 -> ARABIC LETTER REH + u'\u0632' # 0x00d2 -> ARABIC LETTER ZAIN + u'\u0633' # 0x00d3 -> ARABIC LETTER SEEN + u'\u0634' # 0x00d4 -> ARABIC LETTER SHEEN + u'\u0635' # 0x00d5 -> ARABIC LETTER SAD + u'\u0636' # 0x00d6 -> ARABIC LETTER DAD + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\u0637' # 0x00d8 -> ARABIC LETTER TAH + u'\u0638' # 0x00d9 -> ARABIC LETTER ZAH + u'\u0639' # 0x00da -> ARABIC LETTER AIN + u'\u063a' # 0x00db -> ARABIC LETTER GHAIN + u'\u0640' # 0x00dc -> ARABIC TATWEEL + u'\u0641' # 0x00dd -> ARABIC LETTER FEH + u'\u0642' # 0x00de -> ARABIC LETTER QAF + u'\u0643' # 0x00df -> ARABIC LETTER KAF + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\u0644' # 0x00e1 -> ARABIC LETTER LAM + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0645' # 0x00e3 -> ARABIC LETTER MEEM + u'\u0646' # 0x00e4 -> ARABIC LETTER NOON + u'\u0647' # 0x00e5 -> ARABIC LETTER HEH + u'\u0648' # 0x00e6 -> ARABIC LETTER WAW + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0649' # 0x00ec -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0x00ed -> ARABIC LETTER YEH + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u064b' # 0x00f0 -> ARABIC FATHATAN + u'\u064c' # 0x00f1 -> ARABIC DAMMATAN + u'\u064d' # 0x00f2 -> ARABIC KASRATAN + u'\u064e' # 0x00f3 -> ARABIC FATHA + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u064f' # 0x00f5 -> ARABIC DAMMA + u'\u0650' # 0x00f6 -> ARABIC KASRA + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\u0651' # 0x00f8 -> ARABIC SHADDA + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\u0652' # 0x00fa -> ARABIC SUKUN + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u200e' # 0x00fd -> LEFT-TO-RIGHT MARK + u'\u200f' # 0x00fe -> RIGHT-TO-LEFT MARK + u'\u06d2' # 0x00ff -> ARABIC LETTER YEH BARREE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0152: 0x008c, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x009c, # LATIN SMALL LIGATURE OE + 0x0192: 0x0083, # LATIN SMALL LETTER F WITH HOOK + 0x02c6: 0x0088, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x060c: 0x00a1, # ARABIC COMMA + 0x061b: 0x00ba, # ARABIC SEMICOLON + 0x061f: 0x00bf, # ARABIC QUESTION MARK + 0x0621: 0x00c1, # ARABIC LETTER HAMZA + 0x0622: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x0623: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x0624: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x0625: 0x00c5, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x0626: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x0627: 0x00c7, # ARABIC LETTER ALEF + 0x0628: 0x00c8, # ARABIC LETTER BEH + 0x0629: 0x00c9, # ARABIC LETTER TEH MARBUTA + 0x062a: 0x00ca, # ARABIC LETTER TEH + 0x062b: 0x00cb, # ARABIC LETTER THEH + 0x062c: 0x00cc, # ARABIC LETTER JEEM + 0x062d: 0x00cd, # ARABIC LETTER HAH + 0x062e: 0x00ce, # ARABIC LETTER KHAH + 0x062f: 0x00cf, # ARABIC LETTER DAL + 0x0630: 0x00d0, # ARABIC LETTER THAL + 0x0631: 0x00d1, # ARABIC LETTER REH + 0x0632: 0x00d2, # ARABIC LETTER ZAIN + 0x0633: 0x00d3, # ARABIC LETTER SEEN + 0x0634: 0x00d4, # ARABIC LETTER SHEEN + 0x0635: 0x00d5, # ARABIC LETTER SAD + 0x0636: 0x00d6, # ARABIC LETTER DAD + 0x0637: 0x00d8, # ARABIC LETTER TAH + 0x0638: 0x00d9, # ARABIC LETTER ZAH + 0x0639: 0x00da, # ARABIC LETTER AIN + 0x063a: 0x00db, # ARABIC LETTER GHAIN + 0x0640: 0x00dc, # ARABIC TATWEEL + 0x0641: 0x00dd, # ARABIC LETTER FEH + 0x0642: 0x00de, # ARABIC LETTER QAF + 0x0643: 0x00df, # ARABIC LETTER KAF + 0x0644: 0x00e1, # ARABIC LETTER LAM + 0x0645: 0x00e3, # ARABIC LETTER MEEM + 0x0646: 0x00e4, # ARABIC LETTER NOON + 0x0647: 0x00e5, # ARABIC LETTER HEH + 0x0648: 0x00e6, # ARABIC LETTER WAW + 0x0649: 0x00ec, # ARABIC LETTER ALEF MAKSURA + 0x064a: 0x00ed, # ARABIC LETTER YEH + 0x064b: 0x00f0, # ARABIC FATHATAN + 0x064c: 0x00f1, # ARABIC DAMMATAN + 0x064d: 0x00f2, # ARABIC KASRATAN + 0x064e: 0x00f3, # ARABIC FATHA + 0x064f: 0x00f5, # ARABIC DAMMA + 0x0650: 0x00f6, # ARABIC KASRA + 0x0651: 0x00f8, # ARABIC SHADDA + 0x0652: 0x00fa, # ARABIC SUKUN + 0x0679: 0x008a, # ARABIC LETTER TTEH + 0x067e: 0x0081, # ARABIC LETTER PEH + 0x0686: 0x008d, # ARABIC LETTER TCHEH + 0x0688: 0x008f, # ARABIC LETTER DDAL + 0x0691: 0x009a, # ARABIC LETTER RREH + 0x0698: 0x008e, # ARABIC LETTER JEH + 0x06a9: 0x0098, # ARABIC LETTER KEHEH + 0x06af: 0x0090, # ARABIC LETTER GAF + 0x06ba: 0x009f, # ARABIC LETTER NOON GHUNNA + 0x06be: 0x00aa, # ARABIC LETTER HEH DOACHASHMEE + 0x06c1: 0x00c0, # ARABIC LETTER HEH GOAL + 0x06d2: 0x00ff, # ARABIC LETTER YEH BARREE + 0x200c: 0x009d, # ZERO WIDTH NON-JOINER + 0x200d: 0x009e, # ZERO WIDTH JOINER + 0x200e: 0x00fd, # LEFT-TO-RIGHT MARK + 0x200f: 0x00fe, # RIGHT-TO-LEFT MARK + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +} \ No newline at end of file Index: cp1257.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1257.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp1257.py 8 Aug 2002 20:19:18 -0000 1.4 +++ cp1257.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1257.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1257.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,95 +32,601 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: None, # UNDEFINED - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: None, # UNDEFINED - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: None, # UNDEFINED - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: None, # UNDEFINED - 0x008d: 0x00a8, # DIAERESIS - 0x008e: 0x02c7, # CARON - 0x008f: 0x00b8, # CEDILLA - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: None, # UNDEFINED - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: None, # UNDEFINED - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: None, # UNDEFINED - 0x009d: 0x00af, # MACRON - 0x009e: 0x02db, # OGONEK - 0x009f: None, # UNDEFINED - 0x00a1: None, # UNDEFINED - 0x00a5: None, # UNDEFINED - 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x00af: 0x00c6, # LATIN CAPITAL LETTER AE - 0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x00bf: 0x00e6, # LATIN SMALL LETTER AE - 0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00ff: 0x02d9, # DOT ABOVE + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: None, # UNDEFINED + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: None, # UNDEFINED + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: None, # UNDEFINED + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: None, # UNDEFINED + 0x008d: 0x00a8, # DIAERESIS + 0x008e: 0x02c7, # CARON + 0x008f: 0x00b8, # CEDILLA + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: None, # UNDEFINED + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: None, # UNDEFINED + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: None, # UNDEFINED + 0x009d: 0x00af, # MACRON + 0x009e: 0x02db, # OGONEK + 0x009f: None, # UNDEFINED + 0x00a1: None, # UNDEFINED + 0x00a5: None, # UNDEFINED + 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x00af: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x00bf: 0x00e6, # LATIN SMALL LETTER AE + 0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00ff: 0x02d9, # DOT ABOVE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\ufffe' # 0x0083 -> UNDEFINED + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\ufffe' # 0x0088 -> UNDEFINED + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\ufffe' # 0x008a -> UNDEFINED + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x008c -> UNDEFINED + u'\xa8' # 0x008d -> DIAERESIS + u'\u02c7' # 0x008e -> CARON + u'\xb8' # 0x008f -> CEDILLA + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\ufffe' # 0x0098 -> UNDEFINED + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\ufffe' # 0x009a -> UNDEFINED + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x009c -> UNDEFINED + u'\xaf' # 0x009d -> MACRON + u'\u02db' # 0x009e -> OGONEK + u'\ufffe' # 0x009f -> UNDEFINED + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\ufffe' # 0x00a1 -> UNDEFINED + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\ufffe' # 0x00a5 -> UNDEFINED + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xd8' # 0x00a8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u0156' # 0x00aa -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xc6' # 0x00af -> LATIN CAPITAL LETTER AE + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xf8' # 0x00b8 -> LATIN SMALL LETTER O WITH STROKE + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\u0157' # 0x00ba -> LATIN SMALL LETTER R WITH CEDILLA + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xe6' # 0x00bf -> LATIN SMALL LETTER AE + u'\u0104' # 0x00c0 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u012e' # 0x00c1 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u0100' # 0x00c2 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u0106' # 0x00c3 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\u0118' # 0x00c6 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0112' # 0x00c7 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u010c' # 0x00c8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0179' # 0x00ca -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\u0116' # 0x00cb -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\u0122' # 0x00cc -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u0136' # 0x00cd -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u012a' # 0x00ce -> LATIN CAPITAL LETTER I WITH MACRON + u'\u013b' # 0x00cf -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u0160' # 0x00d0 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0143' # 0x00d1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0145' # 0x00d2 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\u014c' # 0x00d4 -> LATIN CAPITAL LETTER O WITH MACRON + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\u0172' # 0x00d8 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u0141' # 0x00d9 -> LATIN CAPITAL LETTER L WITH STROKE + u'\u015a' # 0x00da -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u016a' # 0x00db -> LATIN CAPITAL LETTER U WITH MACRON + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u017b' # 0x00dd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017d' # 0x00de -> LATIN CAPITAL LETTER Z WITH CARON + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\u0105' # 0x00e0 -> LATIN SMALL LETTER A WITH OGONEK + u'\u012f' # 0x00e1 -> LATIN SMALL LETTER I WITH OGONEK + u'\u0101' # 0x00e2 -> LATIN SMALL LETTER A WITH MACRON + u'\u0107' # 0x00e3 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\u0119' # 0x00e6 -> LATIN SMALL LETTER E WITH OGONEK + u'\u0113' # 0x00e7 -> LATIN SMALL LETTER E WITH MACRON + u'\u010d' # 0x00e8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u017a' # 0x00ea -> LATIN SMALL LETTER Z WITH ACUTE + u'\u0117' # 0x00eb -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\u0123' # 0x00ec -> LATIN SMALL LETTER G WITH CEDILLA + u'\u0137' # 0x00ed -> LATIN SMALL LETTER K WITH CEDILLA + u'\u012b' # 0x00ee -> LATIN SMALL LETTER I WITH MACRON + u'\u013c' # 0x00ef -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0161' # 0x00f0 -> LATIN SMALL LETTER S WITH CARON + u'\u0144' # 0x00f1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0146' # 0x00f2 -> LATIN SMALL LETTER N WITH CEDILLA + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\u014d' # 0x00f4 -> LATIN SMALL LETTER O WITH MACRON + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\u0173' # 0x00f8 -> LATIN SMALL LETTER U WITH OGONEK + u'\u0142' # 0x00f9 -> LATIN SMALL LETTER L WITH STROKE + u'\u015b' # 0x00fa -> LATIN SMALL LETTER S WITH ACUTE + u'\u016b' # 0x00fb -> LATIN SMALL LETTER U WITH MACRON + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u017c' # 0x00fd -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u017e' # 0x00fe -> LATIN SMALL LETTER Z WITH CARON + u'\u02d9' # 0x00ff -> DOT ABOVE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x008d, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x009d, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x008f, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00af, # LATIN CAPITAL LETTER AE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00a8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00bf, # LATIN SMALL LETTER AE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00b8, # LATIN SMALL LETTER O WITH STROKE + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0x00c2, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0x00e2, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0x00c0, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00e0, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x00c3, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x00e3, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00c8, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00e8, # LATIN SMALL LETTER C WITH CARON + 0x0112: 0x00c7, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0x00e7, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0x00cb, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0x00eb, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0x00c6, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00e6, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0x00cc, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0x00ec, # LATIN SMALL LETTER G WITH CEDILLA + 0x012a: 0x00ce, # LATIN CAPITAL LETTER I WITH MACRON + 0x012b: 0x00ee, # LATIN SMALL LETTER I WITH MACRON + 0x012e: 0x00c1, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012f: 0x00e1, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0x00cd, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0x00ed, # LATIN SMALL LETTER K WITH CEDILLA + 0x013b: 0x00cf, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013c: 0x00ef, # LATIN SMALL LETTER L WITH CEDILLA + 0x0141: 0x00d9, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x00f9, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00d1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00f1, # LATIN SMALL LETTER N WITH ACUTE + 0x0145: 0x00d2, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0x00f2, # LATIN SMALL LETTER N WITH CEDILLA + 0x014c: 0x00d4, # LATIN CAPITAL LETTER O WITH MACRON + 0x014d: 0x00f4, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0x00aa, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0x00ba, # LATIN SMALL LETTER R WITH CEDILLA + 0x015a: 0x00da, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x00fa, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0x00d0, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00f0, # LATIN SMALL LETTER S WITH CARON + 0x016a: 0x00db, # LATIN CAPITAL LETTER U WITH MACRON + 0x016b: 0x00fb, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0x00d8, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0x00f8, # LATIN SMALL LETTER U WITH OGONEK + 0x0179: 0x00ca, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00ea, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00dd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00fd, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00de, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00fe, # LATIN SMALL LETTER Z WITH CARON + 0x02c7: 0x008e, # CARON + 0x02d9: 0x00ff, # DOT ABOVE + 0x02db: 0x009e, # OGONEK + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +} \ No newline at end of file Index: cp1258.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1258.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp1258.py 8 Aug 2002 20:19:18 -0000 1.4 +++ cp1258.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1258.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1258.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,54 +32,563 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: None, # UNDEFINED - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x02dc, # SMALL TILDE - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: None, # UNDEFINED - 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x009c: 0x0153, # LATIN SMALL LIGATURE OE - 0x009d: None, # UNDEFINED - 0x009e: None, # UNDEFINED - 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00cc: 0x0300, # COMBINING GRAVE ACCENT - 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d2: 0x0309, # COMBINING HOOK ABOVE - 0x00d5: 0x01a0, # LATIN CAPITAL LETTER O WITH HORN - 0x00dd: 0x01af, # LATIN CAPITAL LETTER U WITH HORN - 0x00de: 0x0303, # COMBINING TILDE - 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00ec: 0x0301, # COMBINING ACUTE ACCENT - 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00f2: 0x0323, # COMBINING DOT BELOW - 0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN - 0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN - 0x00fe: 0x20ab, # DONG SIGN + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: None, # UNDEFINED + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x02dc, # SMALL TILDE + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: None, # UNDEFINED + 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x009c: 0x0153, # LATIN SMALL LIGATURE OE + 0x009d: None, # UNDEFINED + 0x009e: None, # UNDEFINED + 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00cc: 0x0300, # COMBINING GRAVE ACCENT + 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d2: 0x0309, # COMBINING HOOK ABOVE + 0x00d5: 0x01a0, # LATIN CAPITAL LETTER O WITH HORN + 0x00dd: 0x01af, # LATIN CAPITAL LETTER U WITH HORN + 0x00de: 0x0303, # COMBINING TILDE + 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00ec: 0x0301, # COMBINING ACUTE ACCENT + 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00f2: 0x0323, # COMBINING DOT BELOW + 0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN + 0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN + 0x00fe: 0x20ab, # DONG SIGN }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\u201a' # 0x0082 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x0083 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x0084 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x0086 -> DAGGER + u'\u2021' # 0x0087 -> DOUBLE DAGGER + u'\u02c6' # 0x0088 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x0089 -> PER MILLE SIGN + u'\ufffe' # 0x008a -> UNDEFINED + u'\u2039' # 0x008b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x008c -> LATIN CAPITAL LIGATURE OE + u'\ufffe' # 0x008d -> UNDEFINED + u'\ufffe' # 0x008e -> UNDEFINED + u'\ufffe' # 0x008f -> UNDEFINED + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\u02dc' # 0x0098 -> SMALL TILDE + u'\u2122' # 0x0099 -> TRADE MARK SIGN + u'\ufffe' # 0x009a -> UNDEFINED + u'\u203a' # 0x009b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x009c -> LATIN SMALL LIGATURE OE + u'\ufffe' # 0x009d -> UNDEFINED + u'\ufffe' # 0x009e -> UNDEFINED + u'\u0178' # 0x009f -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\xa1' # 0x00a1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xaa' # 0x00aa -> FEMININE ORDINAL INDICATOR + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xba' # 0x00ba -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0x00bf -> INVERTED QUESTION MARK + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0x00c3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u0300' # 0x00cc -> COMBINING GRAVE ACCENT + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u0110' # 0x00d0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\u0309' # 0x00d2 -> COMBINING HOOK ABOVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u01a0' # 0x00d5 -> LATIN CAPITAL LETTER O WITH HORN + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u01af' # 0x00dd -> LATIN CAPITAL LETTER U WITH HORN + u'\u0303' # 0x00de -> COMBINING TILDE + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0x00e3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0301' # 0x00ec -> COMBINING ACUTE ACCENT + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u0111' # 0x00f0 -> LATIN SMALL LETTER D WITH STROKE + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\u0323' # 0x00f2 -> COMBINING DOT BELOW + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u01a1' # 0x00f5 -> LATIN SMALL LETTER O WITH HORN + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u01b0' # 0x00fd -> LATIN SMALL LETTER U WITH HORN + u'\u20ab' # 0x00fe -> DONG SIGN + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00ba: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00bf, # INVERTED QUESTION MARK + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0102: 0x00c3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0x00e3, # LATIN SMALL LETTER A WITH BREVE + 0x0110: 0x00d0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00f0, # LATIN SMALL LETTER D WITH STROKE + 0x0152: 0x008c, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x009c, # LATIN SMALL LIGATURE OE + 0x0178: 0x009f, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0x0083, # LATIN SMALL LETTER F WITH HOOK + 0x01a0: 0x00d5, # LATIN CAPITAL LETTER O WITH HORN + 0x01a1: 0x00f5, # LATIN SMALL LETTER O WITH HORN + 0x01af: 0x00dd, # LATIN CAPITAL LETTER U WITH HORN + 0x01b0: 0x00fd, # LATIN SMALL LETTER U WITH HORN + 0x02c6: 0x0088, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02dc: 0x0098, # SMALL TILDE + 0x0300: 0x00cc, # COMBINING GRAVE ACCENT + 0x0301: 0x00ec, # COMBINING ACUTE ACCENT + 0x0303: 0x00de, # COMBINING TILDE + 0x0309: 0x00d2, # COMBINING HOOK ABOVE + 0x0323: 0x00f2, # COMBINING DOT BELOW + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x0082, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x0084, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x0086, # DAGGER + 0x2021: 0x0087, # DOUBLE DAGGER + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x2030: 0x0089, # PER MILLE SIGN + 0x2039: 0x008b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x009b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ab: 0x00fe, # DONG SIGN + 0x20ac: 0x0080, # EURO SIGN + 0x2122: 0x0099, # TRADE MARK SIGN +} \ No newline at end of file Index: cp424.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp424.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp424.py 8 Aug 2002 20:19:18 -0000 1.4 +++ cp424.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP424.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MISC/CP424.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,244 +32,724 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0004: 0x009c, # SELECT - 0x0005: 0x0009, # HORIZONTAL TABULATION - 0x0006: 0x0086, # REQUIRED NEW LINE - 0x0007: 0x007f, # DELETE - 0x0008: 0x0097, # GRAPHIC ESCAPE - 0x0009: 0x008d, # SUPERSCRIPT - 0x000a: 0x008e, # REPEAT - 0x0014: 0x009d, # RESTORE/ENABLE PRESENTATION - 0x0015: 0x0085, # NEW LINE - 0x0016: 0x0008, # BACKSPACE - 0x0017: 0x0087, # PROGRAM OPERATOR COMMUNICATION - 0x001a: 0x0092, # UNIT BACK SPACE - 0x001b: 0x008f, # CUSTOMER USE ONE - 0x0020: 0x0080, # DIGIT SELECT - 0x0021: 0x0081, # START OF SIGNIFICANCE - 0x0022: 0x0082, # FIELD SEPARATOR - 0x0023: 0x0083, # WORD UNDERSCORE - 0x0024: 0x0084, # BYPASS OR INHIBIT PRESENTATION - 0x0025: 0x000a, # LINE FEED - 0x0026: 0x0017, # END OF TRANSMISSION BLOCK - 0x0027: 0x001b, # ESCAPE - 0x0028: 0x0088, # SET ATTRIBUTE - 0x0029: 0x0089, # START FIELD EXTENDED - 0x002a: 0x008a, # SET MODE OR SWITCH - 0x002b: 0x008b, # CONTROL SEQUENCE PREFIX - 0x002c: 0x008c, # MODIFY FIELD ATTRIBUTE - 0x002d: 0x0005, # ENQUIRY - 0x002e: 0x0006, # ACKNOWLEDGE - 0x002f: 0x0007, # BELL - 0x0030: 0x0090, # - 0x0031: 0x0091, # - 0x0032: 0x0016, # SYNCHRONOUS IDLE - 0x0033: 0x0093, # INDEX RETURN - 0x0034: 0x0094, # PRESENTATION POSITION - 0x0035: 0x0095, # TRANSPARENT - 0x0036: 0x0096, # NUMERIC BACKSPACE - 0x0037: 0x0004, # END OF TRANSMISSION - 0x0038: 0x0098, # SUBSCRIPT - 0x0039: 0x0099, # INDENT TABULATION - 0x003a: 0x009a, # REVERSE FORM FEED - 0x003b: 0x009b, # CUSTOMER USE THREE - 0x003c: 0x0014, # DEVICE CONTROL FOUR - 0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x003e: 0x009e, # - 0x003f: 0x001a, # SUBSTITUTE - 0x0040: 0x0020, # SPACE - 0x0041: 0x05d0, # HEBREW LETTER ALEF - 0x0042: 0x05d1, # HEBREW LETTER BET - 0x0043: 0x05d2, # HEBREW LETTER GIMEL - 0x0044: 0x05d3, # HEBREW LETTER DALET - 0x0045: 0x05d4, # HEBREW LETTER HE - 0x0046: 0x05d5, # HEBREW LETTER VAV - 0x0047: 0x05d6, # HEBREW LETTER ZAYIN - 0x0048: 0x05d7, # HEBREW LETTER HET - 0x0049: 0x05d8, # HEBREW LETTER TET - 0x004a: 0x00a2, # CENT SIGN - 0x004b: 0x002e, # FULL STOP - 0x004c: 0x003c, # LESS-THAN SIGN - 0x004d: 0x0028, # LEFT PARENTHESIS - 0x004e: 0x002b, # PLUS SIGN - 0x004f: 0x007c, # VERTICAL LINE - 0x0050: 0x0026, # AMPERSAND - 0x0051: 0x05d9, # HEBREW LETTER YOD - 0x0052: 0x05da, # HEBREW LETTER FINAL KAF - 0x0053: 0x05db, # HEBREW LETTER KAF - 0x0054: 0x05dc, # HEBREW LETTER LAMED - 0x0055: 0x05dd, # HEBREW LETTER FINAL MEM - 0x0056: 0x05de, # HEBREW LETTER MEM - 0x0057: 0x05df, # HEBREW LETTER FINAL NUN - 0x0058: 0x05e0, # HEBREW LETTER NUN - 0x0059: 0x05e1, # HEBREW LETTER SAMEKH - 0x005a: 0x0021, # EXCLAMATION MARK - 0x005b: 0x0024, # DOLLAR SIGN - 0x005c: 0x002a, # ASTERISK - 0x005d: 0x0029, # RIGHT PARENTHESIS - 0x005e: 0x003b, # SEMICOLON - 0x005f: 0x00ac, # NOT SIGN - 0x0060: 0x002d, # HYPHEN-MINUS - 0x0061: 0x002f, # SOLIDUS - 0x0062: 0x05e2, # HEBREW LETTER AYIN - 0x0063: 0x05e3, # HEBREW LETTER FINAL PE - 0x0064: 0x05e4, # HEBREW LETTER PE - 0x0065: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x0066: 0x05e6, # HEBREW LETTER TSADI - 0x0067: 0x05e7, # HEBREW LETTER QOF - 0x0068: 0x05e8, # HEBREW LETTER RESH - 0x0069: 0x05e9, # HEBREW LETTER SHIN - 0x006a: 0x00a6, # BROKEN BAR - 0x006b: 0x002c, # COMMA - 0x006c: 0x0025, # PERCENT SIGN - 0x006d: 0x005f, # LOW LINE - 0x006e: 0x003e, # GREATER-THAN SIGN - 0x006f: 0x003f, # QUESTION MARK - 0x0070: None, # UNDEFINED - 0x0071: 0x05ea, # HEBREW LETTER TAV - 0x0072: None, # UNDEFINED - 0x0073: None, # UNDEFINED - 0x0074: 0x00a0, # NO-BREAK SPACE - 0x0075: None, # UNDEFINED - 0x0076: None, # UNDEFINED - 0x0077: None, # UNDEFINED - 0x0078: 0x2017, # DOUBLE LOW LINE - 0x0079: 0x0060, # GRAVE ACCENT - 0x007a: 0x003a, # COLON - 0x007b: 0x0023, # NUMBER SIGN - 0x007c: 0x0040, # COMMERCIAL AT - 0x007d: 0x0027, # APOSTROPHE - 0x007e: 0x003d, # EQUALS SIGN - 0x007f: 0x0022, # QUOTATION MARK - 0x0080: None, # UNDEFINED - 0x0081: 0x0061, # LATIN SMALL LETTER A - 0x0082: 0x0062, # LATIN SMALL LETTER B - 0x0083: 0x0063, # LATIN SMALL LETTER C - 0x0084: 0x0064, # LATIN SMALL LETTER D - 0x0085: 0x0065, # LATIN SMALL LETTER E - 0x0086: 0x0066, # LATIN SMALL LETTER F - 0x0087: 0x0067, # LATIN SMALL LETTER G - 0x0088: 0x0068, # LATIN SMALL LETTER H - 0x0089: 0x0069, # LATIN SMALL LETTER I - 0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x008c: None, # UNDEFINED - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: 0x00b1, # PLUS-MINUS SIGN - 0x0090: 0x00b0, # DEGREE SIGN - 0x0091: 0x006a, # LATIN SMALL LETTER J - 0x0092: 0x006b, # LATIN SMALL LETTER K - 0x0093: 0x006c, # LATIN SMALL LETTER L - 0x0094: 0x006d, # LATIN SMALL LETTER M - 0x0095: 0x006e, # LATIN SMALL LETTER N - 0x0096: 0x006f, # LATIN SMALL LETTER O - 0x0097: 0x0070, # LATIN SMALL LETTER P - 0x0098: 0x0071, # LATIN SMALL LETTER Q - 0x0099: 0x0072, # LATIN SMALL LETTER R - 0x009a: None, # UNDEFINED - 0x009b: None, # UNDEFINED - 0x009c: None, # UNDEFINED - 0x009d: 0x00b8, # CEDILLA - 0x009e: None, # UNDEFINED - 0x009f: 0x00a4, # CURRENCY SIGN - 0x00a0: 0x00b5, # MICRO SIGN - 0x00a1: 0x007e, # TILDE - 0x00a2: 0x0073, # LATIN SMALL LETTER S - 0x00a3: 0x0074, # LATIN SMALL LETTER T - 0x00a4: 0x0075, # LATIN SMALL LETTER U - 0x00a5: 0x0076, # LATIN SMALL LETTER V - 0x00a6: 0x0077, # LATIN SMALL LETTER W - 0x00a7: 0x0078, # LATIN SMALL LETTER X - 0x00a8: 0x0079, # LATIN SMALL LETTER Y - 0x00a9: 0x007a, # LATIN SMALL LETTER Z - 0x00aa: None, # UNDEFINED - 0x00ab: None, # UNDEFINED - 0x00ac: None, # UNDEFINED - 0x00ad: None, # UNDEFINED - 0x00ae: None, # UNDEFINED - 0x00af: 0x00ae, # REGISTERED SIGN - 0x00b0: 0x005e, # CIRCUMFLEX ACCENT - 0x00b1: 0x00a3, # POUND SIGN - 0x00b2: 0x00a5, # YEN SIGN - 0x00b3: 0x00b7, # MIDDLE DOT - 0x00b4: 0x00a9, # COPYRIGHT SIGN - 0x00b5: 0x00a7, # SECTION SIGN - 0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00ba: 0x005b, # LEFT SQUARE BRACKET - 0x00bb: 0x005d, # RIGHT SQUARE BRACKET - 0x00bc: 0x00af, # MACRON - 0x00bd: 0x00a8, # DIAERESIS - 0x00be: 0x00b4, # ACUTE ACCENT - 0x00bf: 0x00d7, # MULTIPLICATION SIGN - 0x00c0: 0x007b, # LEFT CURLY BRACKET - 0x00c1: 0x0041, # LATIN CAPITAL LETTER A - 0x00c2: 0x0042, # LATIN CAPITAL LETTER B - 0x00c3: 0x0043, # LATIN CAPITAL LETTER C - 0x00c4: 0x0044, # LATIN CAPITAL LETTER D - 0x00c5: 0x0045, # LATIN CAPITAL LETTER E - 0x00c6: 0x0046, # LATIN CAPITAL LETTER F - 0x00c7: 0x0047, # LATIN CAPITAL LETTER G - 0x00c8: 0x0048, # LATIN CAPITAL LETTER H - 0x00c9: 0x0049, # LATIN CAPITAL LETTER I - 0x00ca: 0x00ad, # SOFT HYPHEN - 0x00cb: None, # UNDEFINED - 0x00cc: None, # UNDEFINED - 0x00cd: None, # UNDEFINED - 0x00ce: None, # UNDEFINED - 0x00cf: None, # UNDEFINED - 0x00d0: 0x007d, # RIGHT CURLY BRACKET - 0x00d1: 0x004a, # LATIN CAPITAL LETTER J - 0x00d2: 0x004b, # LATIN CAPITAL LETTER K - 0x00d3: 0x004c, # LATIN CAPITAL LETTER L - 0x00d4: 0x004d, # LATIN CAPITAL LETTER M - 0x00d5: 0x004e, # LATIN CAPITAL LETTER N - 0x00d6: 0x004f, # LATIN CAPITAL LETTER O - 0x00d7: 0x0050, # LATIN CAPITAL LETTER P - 0x00d8: 0x0051, # LATIN CAPITAL LETTER Q - 0x00d9: 0x0052, # LATIN CAPITAL LETTER R - 0x00da: 0x00b9, # SUPERSCRIPT ONE - 0x00db: None, # UNDEFINED - 0x00dc: None, # UNDEFINED - 0x00dd: None, # UNDEFINED - 0x00de: None, # UNDEFINED - 0x00df: None, # UNDEFINED - 0x00e0: 0x005c, # REVERSE SOLIDUS - 0x00e1: 0x00f7, # DIVISION SIGN - 0x00e2: 0x0053, # LATIN CAPITAL LETTER S - 0x00e3: 0x0054, # LATIN CAPITAL LETTER T - 0x00e4: 0x0055, # LATIN CAPITAL LETTER U - 0x00e5: 0x0056, # LATIN CAPITAL LETTER V - 0x00e6: 0x0057, # LATIN CAPITAL LETTER W - 0x00e7: 0x0058, # LATIN CAPITAL LETTER X - 0x00e8: 0x0059, # LATIN CAPITAL LETTER Y - 0x00e9: 0x005a, # LATIN CAPITAL LETTER Z - 0x00ea: 0x00b2, # SUPERSCRIPT TWO - 0x00eb: None, # UNDEFINED - 0x00ec: None, # UNDEFINED - 0x00ed: None, # UNDEFINED - 0x00ee: None, # UNDEFINED - 0x00ef: None, # UNDEFINED - 0x00f0: 0x0030, # DIGIT ZERO - 0x00f1: 0x0031, # DIGIT ONE - 0x00f2: 0x0032, # DIGIT TWO - 0x00f3: 0x0033, # DIGIT THREE - 0x00f4: 0x0034, # DIGIT FOUR - 0x00f5: 0x0035, # DIGIT FIVE - 0x00f6: 0x0036, # DIGIT SIX - 0x00f7: 0x0037, # DIGIT SEVEN - 0x00f8: 0x0038, # DIGIT EIGHT - 0x00f9: 0x0039, # DIGIT NINE - 0x00fa: 0x00b3, # SUPERSCRIPT THREE - 0x00fb: None, # UNDEFINED - 0x00fc: None, # UNDEFINED - 0x00fd: None, # UNDEFINED - 0x00fe: None, # UNDEFINED - 0x00ff: 0x009f, # EIGHT ONES + 0x0004: 0x009c, # SELECT + 0x0005: 0x0009, # HORIZONTAL TABULATION + 0x0006: 0x0086, # REQUIRED NEW LINE + 0x0007: 0x007f, # DELETE + 0x0008: 0x0097, # GRAPHIC ESCAPE + 0x0009: 0x008d, # SUPERSCRIPT + 0x000a: 0x008e, # REPEAT + 0x0014: 0x009d, # RESTORE/ENABLE PRESENTATION + 0x0015: 0x0085, # NEW LINE + 0x0016: 0x0008, # BACKSPACE + 0x0017: 0x0087, # PROGRAM OPERATOR COMMUNICATION + 0x001a: 0x0092, # UNIT BACK SPACE + 0x001b: 0x008f, # CUSTOMER USE ONE + 0x0020: 0x0080, # DIGIT SELECT + 0x0021: 0x0081, # START OF SIGNIFICANCE + 0x0022: 0x0082, # FIELD SEPARATOR + 0x0023: 0x0083, # WORD UNDERSCORE + 0x0024: 0x0084, # BYPASS OR INHIBIT PRESENTATION + 0x0025: 0x000a, # LINE FEED + 0x0026: 0x0017, # END OF TRANSMISSION BLOCK + 0x0027: 0x001b, # ESCAPE + 0x0028: 0x0088, # SET ATTRIBUTE + 0x0029: 0x0089, # START FIELD EXTENDED + 0x002a: 0x008a, # SET MODE OR SWITCH + 0x002b: 0x008b, # CONTROL SEQUENCE PREFIX + 0x002c: 0x008c, # MODIFY FIELD ATTRIBUTE + 0x002d: 0x0005, # ENQUIRY + 0x002e: 0x0006, # ACKNOWLEDGE + 0x002f: 0x0007, # BELL + 0x0030: 0x0090, # + 0x0031: 0x0091, # + 0x0032: 0x0016, # SYNCHRONOUS IDLE + 0x0033: 0x0093, # INDEX RETURN + 0x0034: 0x0094, # PRESENTATION POSITION + 0x0035: 0x0095, # TRANSPARENT + 0x0036: 0x0096, # NUMERIC BACKSPACE + 0x0037: 0x0004, # END OF TRANSMISSION + 0x0038: 0x0098, # SUBSCRIPT + 0x0039: 0x0099, # INDENT TABULATION + 0x003a: 0x009a, # REVERSE FORM FEED + 0x003b: 0x009b, # CUSTOMER USE THREE + 0x003c: 0x0014, # DEVICE CONTROL FOUR + 0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x003e: 0x009e, # + 0x003f: 0x001a, # SUBSTITUTE + 0x0040: 0x0020, # SPACE + 0x0041: 0x05d0, # HEBREW LETTER ALEF + 0x0042: 0x05d1, # HEBREW LETTER BET + 0x0043: 0x05d2, # HEBREW LETTER GIMEL + 0x0044: 0x05d3, # HEBREW LETTER DALET + 0x0045: 0x05d4, # HEBREW LETTER HE + 0x0046: 0x05d5, # HEBREW LETTER VAV + 0x0047: 0x05d6, # HEBREW LETTER ZAYIN + 0x0048: 0x05d7, # HEBREW LETTER HET + 0x0049: 0x05d8, # HEBREW LETTER TET + 0x004a: 0x00a2, # CENT SIGN + 0x004b: 0x002e, # FULL STOP + 0x004c: 0x003c, # LESS-THAN SIGN + 0x004d: 0x0028, # LEFT PARENTHESIS + 0x004e: 0x002b, # PLUS SIGN + 0x004f: 0x007c, # VERTICAL LINE + 0x0050: 0x0026, # AMPERSAND + 0x0051: 0x05d9, # HEBREW LETTER YOD + 0x0052: 0x05da, # HEBREW LETTER FINAL KAF + 0x0053: 0x05db, # HEBREW LETTER KAF + 0x0054: 0x05dc, # HEBREW LETTER LAMED + 0x0055: 0x05dd, # HEBREW LETTER FINAL MEM + 0x0056: 0x05de, # HEBREW LETTER MEM + 0x0057: 0x05df, # HEBREW LETTER FINAL NUN + 0x0058: 0x05e0, # HEBREW LETTER NUN + 0x0059: 0x05e1, # HEBREW LETTER SAMEKH + 0x005a: 0x0021, # EXCLAMATION MARK + 0x005b: 0x0024, # DOLLAR SIGN + 0x005c: 0x002a, # ASTERISK + 0x005d: 0x0029, # RIGHT PARENTHESIS + 0x005e: 0x003b, # SEMICOLON + 0x005f: 0x00ac, # NOT SIGN + 0x0060: 0x002d, # HYPHEN-MINUS + 0x0061: 0x002f, # SOLIDUS + 0x0062: 0x05e2, # HEBREW LETTER AYIN + 0x0063: 0x05e3, # HEBREW LETTER FINAL PE + 0x0064: 0x05e4, # HEBREW LETTER PE + 0x0065: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x0066: 0x05e6, # HEBREW LETTER TSADI + 0x0067: 0x05e7, # HEBREW LETTER QOF + 0x0068: 0x05e8, # HEBREW LETTER RESH + 0x0069: 0x05e9, # HEBREW LETTER SHIN + 0x006a: 0x00a6, # BROKEN BAR + 0x006b: 0x002c, # COMMA + 0x006c: 0x0025, # PERCENT SIGN + 0x006d: 0x005f, # LOW LINE + 0x006e: 0x003e, # GREATER-THAN SIGN + 0x006f: 0x003f, # QUESTION MARK + 0x0070: None, # UNDEFINED + 0x0071: 0x05ea, # HEBREW LETTER TAV + 0x0072: None, # UNDEFINED + 0x0073: None, # UNDEFINED + 0x0074: 0x00a0, # NO-BREAK SPACE + 0x0075: None, # UNDEFINED + 0x0076: None, # UNDEFINED + 0x0077: None, # UNDEFINED + 0x0078: 0x2017, # DOUBLE LOW LINE + 0x0079: 0x0060, # GRAVE ACCENT + 0x007a: 0x003a, # COLON + 0x007b: 0x0023, # NUMBER SIGN + 0x007c: 0x0040, # COMMERCIAL AT + 0x007d: 0x0027, # APOSTROPHE + 0x007e: 0x003d, # EQUALS SIGN + 0x007f: 0x0022, # QUOTATION MARK + 0x0080: None, # UNDEFINED + 0x0081: 0x0061, # LATIN SMALL LETTER A + 0x0082: 0x0062, # LATIN SMALL LETTER B + 0x0083: 0x0063, # LATIN SMALL LETTER C + 0x0084: 0x0064, # LATIN SMALL LETTER D + 0x0085: 0x0065, # LATIN SMALL LETTER E + 0x0086: 0x0066, # LATIN SMALL LETTER F + 0x0087: 0x0067, # LATIN SMALL LETTER G + 0x0088: 0x0068, # LATIN SMALL LETTER H + 0x0089: 0x0069, # LATIN SMALL LETTER I + 0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x008c: None, # UNDEFINED + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: 0x00b1, # PLUS-MINUS SIGN + 0x0090: 0x00b0, # DEGREE SIGN + 0x0091: 0x006a, # LATIN SMALL LETTER J + 0x0092: 0x006b, # LATIN SMALL LETTER K + 0x0093: 0x006c, # LATIN SMALL LETTER L + 0x0094: 0x006d, # LATIN SMALL LETTER M + 0x0095: 0x006e, # LATIN SMALL LETTER N + 0x0096: 0x006f, # LATIN SMALL LETTER O + 0x0097: 0x0070, # LATIN SMALL LETTER P + 0x0098: 0x0071, # LATIN SMALL LETTER Q + 0x0099: 0x0072, # LATIN SMALL LETTER R + 0x009a: None, # UNDEFINED + 0x009b: None, # UNDEFINED + 0x009c: None, # UNDEFINED + 0x009d: 0x00b8, # CEDILLA + 0x009e: None, # UNDEFINED + 0x009f: 0x00a4, # CURRENCY SIGN + 0x00a0: 0x00b5, # MICRO SIGN + 0x00a1: 0x007e, # TILDE + 0x00a2: 0x0073, # LATIN SMALL LETTER S + 0x00a3: 0x0074, # LATIN SMALL LETTER T + 0x00a4: 0x0075, # LATIN SMALL LETTER U + 0x00a5: 0x0076, # LATIN SMALL LETTER V + 0x00a6: 0x0077, # LATIN SMALL LETTER W + 0x00a7: 0x0078, # LATIN SMALL LETTER X + 0x00a8: 0x0079, # LATIN SMALL LETTER Y + 0x00a9: 0x007a, # LATIN SMALL LETTER Z + 0x00aa: None, # UNDEFINED + 0x00ab: None, # UNDEFINED + 0x00ac: None, # UNDEFINED + 0x00ad: None, # UNDEFINED + 0x00ae: None, # UNDEFINED + 0x00af: 0x00ae, # REGISTERED SIGN + 0x00b0: 0x005e, # CIRCUMFLEX ACCENT + 0x00b1: 0x00a3, # POUND SIGN + 0x00b2: 0x00a5, # YEN SIGN + 0x00b3: 0x00b7, # MIDDLE DOT + 0x00b4: 0x00a9, # COPYRIGHT SIGN + 0x00b5: 0x00a7, # SECTION SIGN + 0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00ba: 0x005b, # LEFT SQUARE BRACKET + 0x00bb: 0x005d, # RIGHT SQUARE BRACKET + 0x00bc: 0x00af, # MACRON + 0x00bd: 0x00a8, # DIAERESIS + 0x00be: 0x00b4, # ACUTE ACCENT + 0x00bf: 0x00d7, # MULTIPLICATION SIGN + 0x00c0: 0x007b, # LEFT CURLY BRACKET + 0x00c1: 0x0041, # LATIN CAPITAL LETTER A + 0x00c2: 0x0042, # LATIN CAPITAL LETTER B + 0x00c3: 0x0043, # LATIN CAPITAL LETTER C + 0x00c4: 0x0044, # LATIN CAPITAL LETTER D + 0x00c5: 0x0045, # LATIN CAPITAL LETTER E + 0x00c6: 0x0046, # LATIN CAPITAL LETTER F + 0x00c7: 0x0047, # LATIN CAPITAL LETTER G + 0x00c8: 0x0048, # LATIN CAPITAL LETTER H + 0x00c9: 0x0049, # LATIN CAPITAL LETTER I + 0x00ca: 0x00ad, # SOFT HYPHEN + 0x00cb: None, # UNDEFINED + 0x00cc: None, # UNDEFINED + 0x00cd: None, # UNDEFINED + 0x00ce: None, # UNDEFINED + 0x00cf: None, # UNDEFINED + 0x00d0: 0x007d, # RIGHT CURLY BRACKET + 0x00d1: 0x004a, # LATIN CAPITAL LETTER J + 0x00d2: 0x004b, # LATIN CAPITAL LETTER K + 0x00d3: 0x004c, # LATIN CAPITAL LETTER L + 0x00d4: 0x004d, # LATIN CAPITAL LETTER M + 0x00d5: 0x004e, # LATIN CAPITAL LETTER N + 0x00d6: 0x004f, # LATIN CAPITAL LETTER O + 0x00d7: 0x0050, # LATIN CAPITAL LETTER P + 0x00d8: 0x0051, # LATIN CAPITAL LETTER Q + 0x00d9: 0x0052, # LATIN CAPITAL LETTER R + 0x00da: 0x00b9, # SUPERSCRIPT ONE + 0x00db: None, # UNDEFINED + 0x00dc: None, # UNDEFINED + 0x00dd: None, # UNDEFINED + 0x00de: None, # UNDEFINED + 0x00df: None, # UNDEFINED + 0x00e0: 0x005c, # REVERSE SOLIDUS + 0x00e1: 0x00f7, # DIVISION SIGN + 0x00e2: 0x0053, # LATIN CAPITAL LETTER S + 0x00e3: 0x0054, # LATIN CAPITAL LETTER T + 0x00e4: 0x0055, # LATIN CAPITAL LETTER U + 0x00e5: 0x0056, # LATIN CAPITAL LETTER V + 0x00e6: 0x0057, # LATIN CAPITAL LETTER W + 0x00e7: 0x0058, # LATIN CAPITAL LETTER X + 0x00e8: 0x0059, # LATIN CAPITAL LETTER Y + 0x00e9: 0x005a, # LATIN CAPITAL LETTER Z + 0x00ea: 0x00b2, # SUPERSCRIPT TWO + 0x00eb: None, # UNDEFINED + 0x00ec: None, # UNDEFINED + 0x00ed: None, # UNDEFINED + 0x00ee: None, # UNDEFINED + 0x00ef: None, # UNDEFINED + 0x00f0: 0x0030, # DIGIT ZERO + 0x00f1: 0x0031, # DIGIT ONE + 0x00f2: 0x0032, # DIGIT TWO + 0x00f3: 0x0033, # DIGIT THREE + 0x00f4: 0x0034, # DIGIT FOUR + 0x00f5: 0x0035, # DIGIT FIVE + 0x00f6: 0x0036, # DIGIT SIX + 0x00f7: 0x0037, # DIGIT SEVEN + 0x00f8: 0x0038, # DIGIT EIGHT + 0x00f9: 0x0039, # DIGIT NINE + 0x00fa: 0x00b3, # SUPERSCRIPT THREE + 0x00fb: None, # UNDEFINED + 0x00fc: None, # UNDEFINED + 0x00fd: None, # UNDEFINED + 0x00fe: None, # UNDEFINED + 0x00ff: 0x009f, # EIGHT ONES }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x9c' # 0x0004 -> SELECT + u'\t' # 0x0005 -> HORIZONTAL TABULATION + u'\x86' # 0x0006 -> REQUIRED NEW LINE + u'\x7f' # 0x0007 -> DELETE + u'\x97' # 0x0008 -> GRAPHIC ESCAPE + u'\x8d' # 0x0009 -> SUPERSCRIPT + u'\x8e' # 0x000a -> REPEAT + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x9d' # 0x0014 -> RESTORE/ENABLE PRESENTATION + u'\x85' # 0x0015 -> NEW LINE + u'\x08' # 0x0016 -> BACKSPACE + u'\x87' # 0x0017 -> PROGRAM OPERATOR COMMUNICATION + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x92' # 0x001a -> UNIT BACK SPACE + u'\x8f' # 0x001b -> CUSTOMER USE ONE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u'\x80' # 0x0020 -> DIGIT SELECT + u'\x81' # 0x0021 -> START OF SIGNIFICANCE + u'\x82' # 0x0022 -> FIELD SEPARATOR + u'\x83' # 0x0023 -> WORD UNDERSCORE + u'\x84' # 0x0024 -> BYPASS OR INHIBIT PRESENTATION + u'\n' # 0x0025 -> LINE FEED + u'\x17' # 0x0026 -> END OF TRANSMISSION BLOCK + u'\x1b' # 0x0027 -> ESCAPE + u'\x88' # 0x0028 -> SET ATTRIBUTE + u'\x89' # 0x0029 -> START FIELD EXTENDED + u'\x8a' # 0x002a -> SET MODE OR SWITCH + u'\x8b' # 0x002b -> CONTROL SEQUENCE PREFIX + u'\x8c' # 0x002c -> MODIFY FIELD ATTRIBUTE + u'\x05' # 0x002d -> ENQUIRY + u'\x06' # 0x002e -> ACKNOWLEDGE + u'\x07' # 0x002f -> BELL + u'\x90' # 0x0030 -> + u'\x91' # 0x0031 -> + u'\x16' # 0x0032 -> SYNCHRONOUS IDLE + u'\x93' # 0x0033 -> INDEX RETURN + u'\x94' # 0x0034 -> PRESENTATION POSITION + u'\x95' # 0x0035 -> TRANSPARENT + u'\x96' # 0x0036 -> NUMERIC BACKSPACE + u'\x04' # 0x0037 -> END OF TRANSMISSION + u'\x98' # 0x0038 -> SUBSCRIPT + u'\x99' # 0x0039 -> INDENT TABULATION + u'\x9a' # 0x003a -> REVERSE FORM FEED + u'\x9b' # 0x003b -> CUSTOMER USE THREE + u'\x14' # 0x003c -> DEVICE CONTROL FOUR + u'\x15' # 0x003d -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x003e -> + u'\x1a' # 0x003f -> SUBSTITUTE + u' ' # 0x0040 -> SPACE + u'\u05d0' # 0x0041 -> HEBREW LETTER ALEF + u'\u05d1' # 0x0042 -> HEBREW LETTER BET + u'\u05d2' # 0x0043 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x0044 -> HEBREW LETTER DALET + u'\u05d4' # 0x0045 -> HEBREW LETTER HE + u'\u05d5' # 0x0046 -> HEBREW LETTER VAV + u'\u05d6' # 0x0047 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x0048 -> HEBREW LETTER HET + u'\u05d8' # 0x0049 -> HEBREW LETTER TET + u'\xa2' # 0x004a -> CENT SIGN + u'.' # 0x004b -> FULL STOP + u'<' # 0x004c -> LESS-THAN SIGN + u'(' # 0x004d -> LEFT PARENTHESIS + u'+' # 0x004e -> PLUS SIGN + u'|' # 0x004f -> VERTICAL LINE + u'&' # 0x0050 -> AMPERSAND + u'\u05d9' # 0x0051 -> HEBREW LETTER YOD + u'\u05da' # 0x0052 -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x0053 -> HEBREW LETTER KAF + u'\u05dc' # 0x0054 -> HEBREW LETTER LAMED + u'\u05dd' # 0x0055 -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x0056 -> HEBREW LETTER MEM + u'\u05df' # 0x0057 -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x0058 -> HEBREW LETTER NUN + u'\u05e1' # 0x0059 -> HEBREW LETTER SAMEKH + u'!' # 0x005a -> EXCLAMATION MARK + u'$' # 0x005b -> DOLLAR SIGN + u'*' # 0x005c -> ASTERISK + u')' # 0x005d -> RIGHT PARENTHESIS + u';' # 0x005e -> SEMICOLON + u'\xac' # 0x005f -> NOT SIGN + u'-' # 0x0060 -> HYPHEN-MINUS + u'/' # 0x0061 -> SOLIDUS + u'\u05e2' # 0x0062 -> HEBREW LETTER AYIN + u'\u05e3' # 0x0063 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x0064 -> HEBREW LETTER PE + u'\u05e5' # 0x0065 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x0066 -> HEBREW LETTER TSADI + u'\u05e7' # 0x0067 -> HEBREW LETTER QOF + u'\u05e8' # 0x0068 -> HEBREW LETTER RESH + u'\u05e9' # 0x0069 -> HEBREW LETTER SHIN + u'\xa6' # 0x006a -> BROKEN BAR + u',' # 0x006b -> COMMA + u'%' # 0x006c -> PERCENT SIGN + u'_' # 0x006d -> LOW LINE + u'>' # 0x006e -> GREATER-THAN SIGN + u'?' # 0x006f -> QUESTION MARK + u'\ufffe' # 0x0070 -> UNDEFINED + u'\u05ea' # 0x0071 -> HEBREW LETTER TAV + u'\ufffe' # 0x0072 -> UNDEFINED + u'\ufffe' # 0x0073 -> UNDEFINED + u'\xa0' # 0x0074 -> NO-BREAK SPACE + u'\ufffe' # 0x0075 -> UNDEFINED + u'\ufffe' # 0x0076 -> UNDEFINED + u'\ufffe' # 0x0077 -> UNDEFINED + u'\u2017' # 0x0078 -> DOUBLE LOW LINE + u'`' # 0x0079 -> GRAVE ACCENT + u':' # 0x007a -> COLON + u'#' # 0x007b -> NUMBER SIGN + u'@' # 0x007c -> COMMERCIAL AT + u"'" # 0x007d -> APOSTROPHE + u'=' # 0x007e -> EQUALS SIGN + u'"' # 0x007f -> QUOTATION MARK + u'\ufffe' # 0x0080 -> UNDEFINED + u'a' # 0x0081 -> LATIN SMALL LETTER A + u'b' # 0x0082 -> LATIN SMALL LETTER B + u'c' # 0x0083 -> LATIN SMALL LETTER C + u'd' # 0x0084 -> LATIN SMALL LETTER D + u'e' # 0x0085 -> LATIN SMALL LETTER E + u'f' # 0x0086 -> LATIN SMALL LETTER F + u'g' # 0x0087 -> LATIN SMALL LETTER G + u'h' # 0x0088 -> LATIN SMALL LETTER H + u'i' # 0x0089 -> LATIN SMALL LETTER I + u'\xab' # 0x008a -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x008b -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\ufffe' # 0x008c -> UNDEFINED + u'\ufffe' # 0x008d -> UNDEFINED + u'\ufffe' # 0x008e -> UNDEFINED + u'\xb1' # 0x008f -> PLUS-MINUS SIGN + u'\xb0' # 0x0090 -> DEGREE SIGN + u'j' # 0x0091 -> LATIN SMALL LETTER J + u'k' # 0x0092 -> LATIN SMALL LETTER K + u'l' # 0x0093 -> LATIN SMALL LETTER L + u'm' # 0x0094 -> LATIN SMALL LETTER M + u'n' # 0x0095 -> LATIN SMALL LETTER N + u'o' # 0x0096 -> LATIN SMALL LETTER O + u'p' # 0x0097 -> LATIN SMALL LETTER P + u'q' # 0x0098 -> LATIN SMALL LETTER Q + u'r' # 0x0099 -> LATIN SMALL LETTER R + u'\ufffe' # 0x009a -> UNDEFINED + u'\ufffe' # 0x009b -> UNDEFINED + u'\ufffe' # 0x009c -> UNDEFINED + u'\xb8' # 0x009d -> CEDILLA + u'\ufffe' # 0x009e -> UNDEFINED + u'\xa4' # 0x009f -> CURRENCY SIGN + u'\xb5' # 0x00a0 -> MICRO SIGN + u'~' # 0x00a1 -> TILDE + u's' # 0x00a2 -> LATIN SMALL LETTER S + u't' # 0x00a3 -> LATIN SMALL LETTER T + u'u' # 0x00a4 -> LATIN SMALL LETTER U + u'v' # 0x00a5 -> LATIN SMALL LETTER V + u'w' # 0x00a6 -> LATIN SMALL LETTER W + u'x' # 0x00a7 -> LATIN SMALL LETTER X + u'y' # 0x00a8 -> LATIN SMALL LETTER Y + u'z' # 0x00a9 -> LATIN SMALL LETTER Z + u'\ufffe' # 0x00aa -> UNDEFINED + u'\ufffe' # 0x00ab -> UNDEFINED + u'\ufffe' # 0x00ac -> UNDEFINED + u'\ufffe' # 0x00ad -> UNDEFINED + u'\ufffe' # 0x00ae -> UNDEFINED + u'\xae' # 0x00af -> REGISTERED SIGN + u'^' # 0x00b0 -> CIRCUMFLEX ACCENT + u'\xa3' # 0x00b1 -> POUND SIGN + u'\xa5' # 0x00b2 -> YEN SIGN + u'\xb7' # 0x00b3 -> MIDDLE DOT + u'\xa9' # 0x00b4 -> COPYRIGHT SIGN + u'\xa7' # 0x00b5 -> SECTION SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xbc' # 0x00b7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00b8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00b9 -> VULGAR FRACTION THREE QUARTERS + u'[' # 0x00ba -> LEFT SQUARE BRACKET + u']' # 0x00bb -> RIGHT SQUARE BRACKET + u'\xaf' # 0x00bc -> MACRON + u'\xa8' # 0x00bd -> DIAERESIS + u'\xb4' # 0x00be -> ACUTE ACCENT + u'\xd7' # 0x00bf -> MULTIPLICATION SIGN + u'{' # 0x00c0 -> LEFT CURLY BRACKET + u'A' # 0x00c1 -> LATIN CAPITAL LETTER A + u'B' # 0x00c2 -> LATIN CAPITAL LETTER B + u'C' # 0x00c3 -> LATIN CAPITAL LETTER C + u'D' # 0x00c4 -> LATIN CAPITAL LETTER D + u'E' # 0x00c5 -> LATIN CAPITAL LETTER E + u'F' # 0x00c6 -> LATIN CAPITAL LETTER F + u'G' # 0x00c7 -> LATIN CAPITAL LETTER G + u'H' # 0x00c8 -> LATIN CAPITAL LETTER H + u'I' # 0x00c9 -> LATIN CAPITAL LETTER I + u'\xad' # 0x00ca -> SOFT HYPHEN + u'\ufffe' # 0x00cb -> UNDEFINED + u'\ufffe' # 0x00cc -> UNDEFINED + u'\ufffe' # 0x00cd -> UNDEFINED + u'\ufffe' # 0x00ce -> UNDEFINED + u'\ufffe' # 0x00cf -> UNDEFINED + u'}' # 0x00d0 -> RIGHT CURLY BRACKET + u'J' # 0x00d1 -> LATIN CAPITAL LETTER J + u'K' # 0x00d2 -> LATIN CAPITAL LETTER K + u'L' # 0x00d3 -> LATIN CAPITAL LETTER L + u'M' # 0x00d4 -> LATIN CAPITAL LETTER M + u'N' # 0x00d5 -> LATIN CAPITAL LETTER N + u'O' # 0x00d6 -> LATIN CAPITAL LETTER O + u'P' # 0x00d7 -> LATIN CAPITAL LETTER P + u'Q' # 0x00d8 -> LATIN CAPITAL LETTER Q + u'R' # 0x00d9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0x00da -> SUPERSCRIPT ONE + u'\ufffe' # 0x00db -> UNDEFINED + u'\ufffe' # 0x00dc -> UNDEFINED + u'\ufffe' # 0x00dd -> UNDEFINED + u'\ufffe' # 0x00de -> UNDEFINED + u'\ufffe' # 0x00df -> UNDEFINED + u'\\' # 0x00e0 -> REVERSE SOLIDUS + u'\xf7' # 0x00e1 -> DIVISION SIGN + u'S' # 0x00e2 -> LATIN CAPITAL LETTER S + u'T' # 0x00e3 -> LATIN CAPITAL LETTER T + u'U' # 0x00e4 -> LATIN CAPITAL LETTER U + u'V' # 0x00e5 -> LATIN CAPITAL LETTER V + u'W' # 0x00e6 -> LATIN CAPITAL LETTER W + u'X' # 0x00e7 -> LATIN CAPITAL LETTER X + u'Y' # 0x00e8 -> LATIN CAPITAL LETTER Y + u'Z' # 0x00e9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0x00ea -> SUPERSCRIPT TWO + u'\ufffe' # 0x00eb -> UNDEFINED + u'\ufffe' # 0x00ec -> UNDEFINED + u'\ufffe' # 0x00ed -> UNDEFINED + u'\ufffe' # 0x00ee -> UNDEFINED + u'\ufffe' # 0x00ef -> UNDEFINED + u'0' # 0x00f0 -> DIGIT ZERO + u'1' # 0x00f1 -> DIGIT ONE + u'2' # 0x00f2 -> DIGIT TWO + u'3' # 0x00f3 -> DIGIT THREE + u'4' # 0x00f4 -> DIGIT FOUR + u'5' # 0x00f5 -> DIGIT FIVE + u'6' # 0x00f6 -> DIGIT SIX + u'7' # 0x00f7 -> DIGIT SEVEN + u'8' # 0x00f8 -> DIGIT EIGHT + u'9' # 0x00f9 -> DIGIT NINE + u'\xb3' # 0x00fa -> SUPERSCRIPT THREE + u'\ufffe' # 0x00fb -> UNDEFINED + u'\ufffe' # 0x00fc -> UNDEFINED + u'\ufffe' # 0x00fd -> UNDEFINED + u'\ufffe' # 0x00fe -> UNDEFINED + u'\x9f' # 0x00ff -> EIGHT ONES +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0037, # END OF TRANSMISSION + 0x0005: 0x002d, # ENQUIRY + 0x0006: 0x002e, # ACKNOWLEDGE + 0x0007: 0x002f, # BELL + 0x0008: 0x0016, # BACKSPACE + 0x0009: 0x0005, # HORIZONTAL TABULATION + 0x000a: 0x0025, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x003c, # DEVICE CONTROL FOUR + 0x0015: 0x003d, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0032, # SYNCHRONOUS IDLE + 0x0017: 0x0026, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x003f, # SUBSTITUTE + 0x001b: 0x0027, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0040, # SPACE + 0x0021: 0x005a, # EXCLAMATION MARK + 0x0022: 0x007f, # QUOTATION MARK + 0x0023: 0x007b, # NUMBER SIGN + 0x0024: 0x005b, # DOLLAR SIGN + 0x0025: 0x006c, # PERCENT SIGN + 0x0026: 0x0050, # AMPERSAND + 0x0027: 0x007d, # APOSTROPHE + 0x0028: 0x004d, # LEFT PARENTHESIS + 0x0029: 0x005d, # RIGHT PARENTHESIS + 0x002a: 0x005c, # ASTERISK + 0x002b: 0x004e, # PLUS SIGN + 0x002c: 0x006b, # COMMA + 0x002d: 0x0060, # HYPHEN-MINUS + 0x002e: 0x004b, # FULL STOP + 0x002f: 0x0061, # SOLIDUS + 0x0030: 0x00f0, # DIGIT ZERO + 0x0031: 0x00f1, # DIGIT ONE + 0x0032: 0x00f2, # DIGIT TWO + 0x0033: 0x00f3, # DIGIT THREE + 0x0034: 0x00f4, # DIGIT FOUR + 0x0035: 0x00f5, # DIGIT FIVE + 0x0036: 0x00f6, # DIGIT SIX + 0x0037: 0x00f7, # DIGIT SEVEN + 0x0038: 0x00f8, # DIGIT EIGHT + 0x0039: 0x00f9, # DIGIT NINE + 0x003a: 0x007a, # COLON + 0x003b: 0x005e, # SEMICOLON + 0x003c: 0x004c, # LESS-THAN SIGN + 0x003d: 0x007e, # EQUALS SIGN + 0x003e: 0x006e, # GREATER-THAN SIGN + 0x003f: 0x006f, # QUESTION MARK + 0x0040: 0x007c, # COMMERCIAL AT + 0x0041: 0x00c1, # LATIN CAPITAL LETTER A + 0x0042: 0x00c2, # LATIN CAPITAL LETTER B + 0x0043: 0x00c3, # LATIN CAPITAL LETTER C + 0x0044: 0x00c4, # LATIN CAPITAL LETTER D + 0x0045: 0x00c5, # LATIN CAPITAL LETTER E + 0x0046: 0x00c6, # LATIN CAPITAL LETTER F + 0x0047: 0x00c7, # LATIN CAPITAL LETTER G + 0x0048: 0x00c8, # LATIN CAPITAL LETTER H + 0x0049: 0x00c9, # LATIN CAPITAL LETTER I + 0x004a: 0x00d1, # LATIN CAPITAL LETTER J + 0x004b: 0x00d2, # LATIN CAPITAL LETTER K + 0x004c: 0x00d3, # LATIN CAPITAL LETTER L + 0x004d: 0x00d4, # LATIN CAPITAL LETTER M + 0x004e: 0x00d5, # LATIN CAPITAL LETTER N + 0x004f: 0x00d6, # LATIN CAPITAL LETTER O + 0x0050: 0x00d7, # LATIN CAPITAL LETTER P + 0x0051: 0x00d8, # LATIN CAPITAL LETTER Q + 0x0052: 0x00d9, # LATIN CAPITAL LETTER R + 0x0053: 0x00e2, # LATIN CAPITAL LETTER S + 0x0054: 0x00e3, # LATIN CAPITAL LETTER T + 0x0055: 0x00e4, # LATIN CAPITAL LETTER U + 0x0056: 0x00e5, # LATIN CAPITAL LETTER V + 0x0057: 0x00e6, # LATIN CAPITAL LETTER W + 0x0058: 0x00e7, # LATIN CAPITAL LETTER X + 0x0059: 0x00e8, # LATIN CAPITAL LETTER Y + 0x005a: 0x00e9, # LATIN CAPITAL LETTER Z + 0x005b: 0x00ba, # LEFT SQUARE BRACKET + 0x005c: 0x00e0, # REVERSE SOLIDUS + 0x005d: 0x00bb, # RIGHT SQUARE BRACKET + 0x005e: 0x00b0, # CIRCUMFLEX ACCENT + 0x005f: 0x006d, # LOW LINE + 0x0060: 0x0079, # GRAVE ACCENT + 0x0061: 0x0081, # LATIN SMALL LETTER A + 0x0062: 0x0082, # LATIN SMALL LETTER B + 0x0063: 0x0083, # LATIN SMALL LETTER C + 0x0064: 0x0084, # LATIN SMALL LETTER D + 0x0065: 0x0085, # LATIN SMALL LETTER E + 0x0066: 0x0086, # LATIN SMALL LETTER F + 0x0067: 0x0087, # LATIN SMALL LETTER G + 0x0068: 0x0088, # LATIN SMALL LETTER H + 0x0069: 0x0089, # LATIN SMALL LETTER I + 0x006a: 0x0091, # LATIN SMALL LETTER J + 0x006b: 0x0092, # LATIN SMALL LETTER K + 0x006c: 0x0093, # LATIN SMALL LETTER L + 0x006d: 0x0094, # LATIN SMALL LETTER M + 0x006e: 0x0095, # LATIN SMALL LETTER N + 0x006f: 0x0096, # LATIN SMALL LETTER O + 0x0070: 0x0097, # LATIN SMALL LETTER P + 0x0071: 0x0098, # LATIN SMALL LETTER Q + 0x0072: 0x0099, # LATIN SMALL LETTER R + 0x0073: 0x00a2, # LATIN SMALL LETTER S + 0x0074: 0x00a3, # LATIN SMALL LETTER T + 0x0075: 0x00a4, # LATIN SMALL LETTER U + 0x0076: 0x00a5, # LATIN SMALL LETTER V + 0x0077: 0x00a6, # LATIN SMALL LETTER W + 0x0078: 0x00a7, # LATIN SMALL LETTER X + 0x0079: 0x00a8, # LATIN SMALL LETTER Y + 0x007a: 0x00a9, # LATIN SMALL LETTER Z + 0x007b: 0x00c0, # LEFT CURLY BRACKET + 0x007c: 0x004f, # VERTICAL LINE + 0x007d: 0x00d0, # RIGHT CURLY BRACKET + 0x007e: 0x00a1, # TILDE + 0x007f: 0x0007, # DELETE + 0x0080: 0x0020, # DIGIT SELECT + 0x0081: 0x0021, # START OF SIGNIFICANCE + 0x0082: 0x0022, # FIELD SEPARATOR + 0x0083: 0x0023, # WORD UNDERSCORE + 0x0084: 0x0024, # BYPASS OR INHIBIT PRESENTATION + 0x0085: 0x0015, # NEW LINE + 0x0086: 0x0006, # REQUIRED NEW LINE + 0x0087: 0x0017, # PROGRAM OPERATOR COMMUNICATION + 0x0088: 0x0028, # SET ATTRIBUTE + 0x0089: 0x0029, # START FIELD EXTENDED + 0x008a: 0x002a, # SET MODE OR SWITCH + 0x008b: 0x002b, # CONTROL SEQUENCE PREFIX + 0x008c: 0x002c, # MODIFY FIELD ATTRIBUTE + 0x008d: 0x0009, # SUPERSCRIPT + 0x008e: 0x000a, # REPEAT + 0x008f: 0x001b, # CUSTOMER USE ONE + 0x0090: 0x0030, # + 0x0091: 0x0031, # + 0x0092: 0x001a, # UNIT BACK SPACE + 0x0093: 0x0033, # INDEX RETURN + 0x0094: 0x0034, # PRESENTATION POSITION + 0x0095: 0x0035, # TRANSPARENT + 0x0096: 0x0036, # NUMERIC BACKSPACE + 0x0097: 0x0008, # GRAPHIC ESCAPE + 0x0098: 0x0038, # SUBSCRIPT + 0x0099: 0x0039, # INDENT TABULATION + 0x009a: 0x003a, # REVERSE FORM FEED + 0x009b: 0x003b, # CUSTOMER USE THREE + 0x009c: 0x0004, # SELECT + 0x009d: 0x0014, # RESTORE/ENABLE PRESENTATION + 0x009e: 0x003e, # + 0x009f: 0x00ff, # EIGHT ONES + 0x00a0: 0x0074, # NO-BREAK SPACE + 0x00a2: 0x004a, # CENT SIGN + 0x00a3: 0x00b1, # POUND SIGN + 0x00a4: 0x009f, # CURRENCY SIGN + 0x00a5: 0x00b2, # YEN SIGN + 0x00a6: 0x006a, # BROKEN BAR + 0x00a7: 0x00b5, # SECTION SIGN + 0x00a8: 0x00bd, # DIAERESIS + 0x00a9: 0x00b4, # COPYRIGHT SIGN + 0x00ab: 0x008a, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x005f, # NOT SIGN + 0x00ad: 0x00ca, # SOFT HYPHEN + 0x00ae: 0x00af, # REGISTERED SIGN + 0x00af: 0x00bc, # MACRON + 0x00b0: 0x0090, # DEGREE SIGN + 0x00b1: 0x008f, # PLUS-MINUS SIGN + 0x00b2: 0x00ea, # SUPERSCRIPT TWO + 0x00b3: 0x00fa, # SUPERSCRIPT THREE + 0x00b4: 0x00be, # ACUTE ACCENT + 0x00b5: 0x00a0, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b3, # MIDDLE DOT + 0x00b8: 0x009d, # CEDILLA + 0x00b9: 0x00da, # SUPERSCRIPT ONE + 0x00bb: 0x008b, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00b7, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00b8, # VULGAR FRACTION ONE HALF + 0x00be: 0x00b9, # VULGAR FRACTION THREE QUARTERS + 0x00d7: 0x00bf, # MULTIPLICATION SIGN + 0x00f7: 0x00e1, # DIVISION SIGN + 0x05d0: 0x0041, # HEBREW LETTER ALEF + 0x05d1: 0x0042, # HEBREW LETTER BET + 0x05d2: 0x0043, # HEBREW LETTER GIMEL + 0x05d3: 0x0044, # HEBREW LETTER DALET + 0x05d4: 0x0045, # HEBREW LETTER HE + 0x05d5: 0x0046, # HEBREW LETTER VAV + 0x05d6: 0x0047, # HEBREW LETTER ZAYIN + 0x05d7: 0x0048, # HEBREW LETTER HET + 0x05d8: 0x0049, # HEBREW LETTER TET + 0x05d9: 0x0051, # HEBREW LETTER YOD + 0x05da: 0x0052, # HEBREW LETTER FINAL KAF + 0x05db: 0x0053, # HEBREW LETTER KAF + 0x05dc: 0x0054, # HEBREW LETTER LAMED + 0x05dd: 0x0055, # HEBREW LETTER FINAL MEM + 0x05de: 0x0056, # HEBREW LETTER MEM + 0x05df: 0x0057, # HEBREW LETTER FINAL NUN + 0x05e0: 0x0058, # HEBREW LETTER NUN + 0x05e1: 0x0059, # HEBREW LETTER SAMEKH + 0x05e2: 0x0062, # HEBREW LETTER AYIN + 0x05e3: 0x0063, # HEBREW LETTER FINAL PE + 0x05e4: 0x0064, # HEBREW LETTER PE + 0x05e5: 0x0065, # HEBREW LETTER FINAL TSADI + 0x05e6: 0x0066, # HEBREW LETTER TSADI + 0x05e7: 0x0067, # HEBREW LETTER QOF + 0x05e8: 0x0068, # HEBREW LETTER RESH + 0x05e9: 0x0069, # HEBREW LETTER SHIN + 0x05ea: 0x0071, # HEBREW LETTER TAV + 0x2017: 0x0078, # DOUBLE LOW LINE +} \ No newline at end of file Index: cp437.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp437.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp437.py 8 Aug 2002 20:19:18 -0000 1.4 +++ cp437.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP437.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP437.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00a5, # YEN SIGN - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00a5, # YEN SIGN + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xa5' # 0x009d -> YEN SIGN + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a5: 0x009d, # YEN SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} \ No newline at end of file Index: cp737.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp737.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp737.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp737.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP737.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP737.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA - 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x0086: 0x0397, # GREEK CAPITAL LETTER ETA - 0x0087: 0x0398, # GREEK CAPITAL LETTER THETA - 0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x008b: 0x039c, # GREEK CAPITAL LETTER MU - 0x008c: 0x039d, # GREEK CAPITAL LETTER NU - 0x008d: 0x039e, # GREEK CAPITAL LETTER XI - 0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x008f: 0x03a0, # GREEK CAPITAL LETTER PI - 0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x0099: 0x03b2, # GREEK SMALL LETTER BETA - 0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x009b: 0x03b4, # GREEK SMALL LETTER DELTA - 0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x009d: 0x03b6, # GREEK SMALL LETTER ZETA - 0x009e: 0x03b7, # GREEK SMALL LETTER ETA - 0x009f: 0x03b8, # GREEK SMALL LETTER THETA - 0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00a3: 0x03bc, # GREEK SMALL LETTER MU - 0x00a4: 0x03bd, # GREEK SMALL LETTER NU - 0x00a5: 0x03be, # GREEK SMALL LETTER XI - 0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00a7: 0x03c0, # GREEK SMALL LETTER PI - 0x00a8: 0x03c1, # GREEK SMALL LETTER RHO - 0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00ab: 0x03c4, # GREEK SMALL LETTER TAU - 0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00ad: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ae: 0x03c7, # GREEK SMALL LETTER CHI - 0x00af: 0x03c8, # GREEK SMALL LETTER PSI - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA + 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x0086: 0x0397, # GREEK CAPITAL LETTER ETA + 0x0087: 0x0398, # GREEK CAPITAL LETTER THETA + 0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x008b: 0x039c, # GREEK CAPITAL LETTER MU + 0x008c: 0x039d, # GREEK CAPITAL LETTER NU + 0x008d: 0x039e, # GREEK CAPITAL LETTER XI + 0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x008f: 0x03a0, # GREEK CAPITAL LETTER PI + 0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x0099: 0x03b2, # GREEK SMALL LETTER BETA + 0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x009b: 0x03b4, # GREEK SMALL LETTER DELTA + 0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x009d: 0x03b6, # GREEK SMALL LETTER ZETA + 0x009e: 0x03b7, # GREEK SMALL LETTER ETA + 0x009f: 0x03b8, # GREEK SMALL LETTER THETA + 0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00a3: 0x03bc, # GREEK SMALL LETTER MU + 0x00a4: 0x03bd, # GREEK SMALL LETTER NU + 0x00a5: 0x03be, # GREEK SMALL LETTER XI + 0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00a7: 0x03c0, # GREEK SMALL LETTER PI + 0x00a8: 0x03c1, # GREEK SMALL LETTER RHO + 0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00ab: 0x03c4, # GREEK SMALL LETTER TAU + 0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00ad: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ae: 0x03c7, # GREEK SMALL LETTER CHI + 0x00af: 0x03c8, # GREEK SMALL LETTER PSI + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0391' # 0x0080 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0x0081 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0x0082 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x0083 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0x0084 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x0085 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x0086 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0x0087 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0x0088 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0x0089 -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0x008a -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0x008b -> GREEK CAPITAL LETTER MU + u'\u039d' # 0x008c -> GREEK CAPITAL LETTER NU + u'\u039e' # 0x008d -> GREEK CAPITAL LETTER XI + u'\u039f' # 0x008e -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0x008f -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0x0090 -> GREEK CAPITAL LETTER RHO + u'\u03a3' # 0x0091 -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0x0092 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0x0093 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0x0094 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0x0095 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0x0096 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x0097 -> GREEK CAPITAL LETTER OMEGA + u'\u03b1' # 0x0098 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x0099 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0x009a -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0x009b -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x009c -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0x009d -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0x009e -> GREEK SMALL LETTER ETA + u'\u03b8' # 0x009f -> GREEK SMALL LETTER THETA + u'\u03b9' # 0x00a0 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0x00a1 -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x00a2 -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x00a3 -> GREEK SMALL LETTER MU + u'\u03bd' # 0x00a4 -> GREEK SMALL LETTER NU + u'\u03be' # 0x00a5 -> GREEK SMALL LETTER XI + u'\u03bf' # 0x00a6 -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0x00a7 -> GREEK SMALL LETTER PI + u'\u03c1' # 0x00a8 -> GREEK SMALL LETTER RHO + u'\u03c3' # 0x00a9 -> GREEK SMALL LETTER SIGMA + u'\u03c2' # 0x00aa -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c4' # 0x00ab -> GREEK SMALL LETTER TAU + u'\u03c5' # 0x00ac -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0x00ad -> GREEK SMALL LETTER PHI + u'\u03c7' # 0x00ae -> GREEK SMALL LETTER CHI + u'\u03c8' # 0x00af -> GREEK SMALL LETTER PSI + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03c9' # 0x00e0 -> GREEK SMALL LETTER OMEGA + u'\u03ac' # 0x00e1 -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0x00e2 -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0x00e3 -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03ca' # 0x00e4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03af' # 0x00e5 -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03cc' # 0x00e6 -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0x00e7 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03cb' # 0x00e8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03ce' # 0x00e9 -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u0386' # 0x00ea -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\u0388' # 0x00eb -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0x00ec -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0x00ed -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\u038c' # 0x00ee -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\u038e' # 0x00ef -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0x00f0 -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u03aa' # 0x00f4 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0x00f5 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00f7: 0x00f6, # DIVISION SIGN + 0x0386: 0x00ea, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0x00eb, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0x00ec, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038a: 0x00ed, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038c: 0x00ee, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038e: 0x00ef, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038f: 0x00f0, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0391: 0x0080, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0x0081, # GREEK CAPITAL LETTER BETA + 0x0393: 0x0082, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0x0083, # GREEK CAPITAL LETTER DELTA + 0x0395: 0x0084, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0x0085, # GREEK CAPITAL LETTER ZETA + 0x0397: 0x0086, # GREEK CAPITAL LETTER ETA + 0x0398: 0x0087, # GREEK CAPITAL LETTER THETA + 0x0399: 0x0088, # GREEK CAPITAL LETTER IOTA + 0x039a: 0x0089, # GREEK CAPITAL LETTER KAPPA + 0x039b: 0x008a, # GREEK CAPITAL LETTER LAMDA + 0x039c: 0x008b, # GREEK CAPITAL LETTER MU + 0x039d: 0x008c, # GREEK CAPITAL LETTER NU + 0x039e: 0x008d, # GREEK CAPITAL LETTER XI + 0x039f: 0x008e, # GREEK CAPITAL LETTER OMICRON + 0x03a0: 0x008f, # GREEK CAPITAL LETTER PI + 0x03a1: 0x0090, # GREEK CAPITAL LETTER RHO + 0x03a3: 0x0091, # GREEK CAPITAL LETTER SIGMA + 0x03a4: 0x0092, # GREEK CAPITAL LETTER TAU + 0x03a5: 0x0093, # GREEK CAPITAL LETTER UPSILON + 0x03a6: 0x0094, # GREEK CAPITAL LETTER PHI + 0x03a7: 0x0095, # GREEK CAPITAL LETTER CHI + 0x03a8: 0x0096, # GREEK CAPITAL LETTER PSI + 0x03a9: 0x0097, # GREEK CAPITAL LETTER OMEGA + 0x03aa: 0x00f4, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03ab: 0x00f5, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03ac: 0x00e1, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03ad: 0x00e2, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03ae: 0x00e3, # GREEK SMALL LETTER ETA WITH TONOS + 0x03af: 0x00e5, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03b1: 0x0098, # GREEK SMALL LETTER ALPHA + 0x03b2: 0x0099, # GREEK SMALL LETTER BETA + 0x03b3: 0x009a, # GREEK SMALL LETTER GAMMA + 0x03b4: 0x009b, # GREEK SMALL LETTER DELTA + 0x03b5: 0x009c, # GREEK SMALL LETTER EPSILON + 0x03b6: 0x009d, # GREEK SMALL LETTER ZETA + 0x03b7: 0x009e, # GREEK SMALL LETTER ETA + 0x03b8: 0x009f, # GREEK SMALL LETTER THETA + 0x03b9: 0x00a0, # GREEK SMALL LETTER IOTA + 0x03ba: 0x00a1, # GREEK SMALL LETTER KAPPA + 0x03bb: 0x00a2, # GREEK SMALL LETTER LAMDA + 0x03bc: 0x00a3, # GREEK SMALL LETTER MU + 0x03bd: 0x00a4, # GREEK SMALL LETTER NU + 0x03be: 0x00a5, # GREEK SMALL LETTER XI + 0x03bf: 0x00a6, # GREEK SMALL LETTER OMICRON + 0x03c0: 0x00a7, # GREEK SMALL LETTER PI + 0x03c1: 0x00a8, # GREEK SMALL LETTER RHO + 0x03c2: 0x00aa, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0x00a9, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00ab, # GREEK SMALL LETTER TAU + 0x03c5: 0x00ac, # GREEK SMALL LETTER UPSILON + 0x03c6: 0x00ad, # GREEK SMALL LETTER PHI + 0x03c7: 0x00ae, # GREEK SMALL LETTER CHI + 0x03c8: 0x00af, # GREEK SMALL LETTER PSI + 0x03c9: 0x00e0, # GREEK SMALL LETTER OMEGA + 0x03ca: 0x00e4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0x00e8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0x00e6, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0x00e7, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0x00e9, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} \ No newline at end of file Index: cp775.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp775.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp775.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp775.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP775.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP775.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0096: 0x00a2, # CENT SIGN - 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x00a4, # CURRENCY SIGN - 0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00a7: 0x00a6, # BROKEN BAR - 0x00a8: 0x00a9, # COPYRIGHT SIGN - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0096: 0x00a2, # CENT SIGN + 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x00a4, # CURRENCY SIGN + 0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00a7: 0x00a6, # BROKEN BAR + 0x00a8: 0x00a9, # COPYRIGHT SIGN + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0106' # 0x0080 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0101' # 0x0083 -> LATIN SMALL LETTER A WITH MACRON + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u0123' # 0x0085 -> LATIN SMALL LETTER G WITH CEDILLA + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\u0107' # 0x0087 -> LATIN SMALL LETTER C WITH ACUTE + u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE + u'\u0113' # 0x0089 -> LATIN SMALL LETTER E WITH MACRON + u'\u0156' # 0x008a -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\u0157' # 0x008b -> LATIN SMALL LETTER R WITH CEDILLA + u'\u012b' # 0x008c -> LATIN SMALL LETTER I WITH MACRON + u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\u014d' # 0x0093 -> LATIN SMALL LETTER O WITH MACRON + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u0122' # 0x0095 -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\xa2' # 0x0096 -> CENT SIGN + u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd7' # 0x009e -> MULTIPLICATION SIGN + u'\xa4' # 0x009f -> CURRENCY SIGN + u'\u0100' # 0x00a0 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u012a' # 0x00a1 -> LATIN CAPITAL LETTER I WITH MACRON + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\u017b' # 0x00a3 -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017c' # 0x00a4 -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u017a' # 0x00a5 -> LATIN SMALL LETTER Z WITH ACUTE + u'\u201d' # 0x00a6 -> RIGHT DOUBLE QUOTATION MARK + u'\xa6' # 0x00a7 -> BROKEN BAR + u'\xa9' # 0x00a8 -> COPYRIGHT SIGN + u'\xae' # 0x00a9 -> REGISTERED SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\u0141' # 0x00ad -> LATIN CAPITAL LETTER L WITH STROKE + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u0104' # 0x00b5 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u010c' # 0x00b6 -> LATIN CAPITAL LETTER C WITH CARON + u'\u0118' # 0x00b7 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0116' # 0x00b8 -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u012e' # 0x00bd -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u0160' # 0x00be -> LATIN CAPITAL LETTER S WITH CARON + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u0172' # 0x00c6 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u016a' # 0x00c7 -> LATIN CAPITAL LETTER U WITH MACRON + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u017d' # 0x00cf -> LATIN CAPITAL LETTER Z WITH CARON + u'\u0105' # 0x00d0 -> LATIN SMALL LETTER A WITH OGONEK + u'\u010d' # 0x00d1 -> LATIN SMALL LETTER C WITH CARON + u'\u0119' # 0x00d2 -> LATIN SMALL LETTER E WITH OGONEK + u'\u0117' # 0x00d3 -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\u012f' # 0x00d4 -> LATIN SMALL LETTER I WITH OGONEK + u'\u0161' # 0x00d5 -> LATIN SMALL LETTER S WITH CARON + u'\u0173' # 0x00d6 -> LATIN SMALL LETTER U WITH OGONEK + u'\u016b' # 0x00d7 -> LATIN SMALL LETTER U WITH MACRON + u'\u017e' # 0x00d8 -> LATIN SMALL LETTER Z WITH CARON + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN) + u'\u014c' # 0x00e2 -> LATIN CAPITAL LETTER O WITH MACRON + u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE + u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u0144' # 0x00e7 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0136' # 0x00e8 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u0137' # 0x00e9 -> LATIN SMALL LETTER K WITH CEDILLA + u'\u013b' # 0x00ea -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u013c' # 0x00eb -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0146' # 0x00ec -> LATIN SMALL LETTER N WITH CEDILLA + u'\u0112' # 0x00ed -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0145' # 0x00ee -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u2019' # 0x00ef -> RIGHT SINGLE QUOTATION MARK + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u201c' # 0x00f2 -> LEFT DOUBLE QUOTATION MARK + u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0x00f4 -> PILCROW SIGN + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u201e' # 0x00f7 -> DOUBLE LOW-9 QUOTATION MARK + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\xb9' # 0x00fb -> SUPERSCRIPT ONE + u'\xb3' # 0x00fc -> SUPERSCRIPT THREE + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a2: 0x0096, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x009f, # CURRENCY SIGN + 0x00a6: 0x00a7, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a9: 0x00a8, # COPYRIGHT SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00ae: 0x00a9, # REGISTERED SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00fc, # SUPERSCRIPT THREE + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x00f4, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b9: 0x00fb, # SUPERSCRIPT ONE + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x009e, # MULTIPLICATION SIGN + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0x00a0, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0x0083, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0x00b5, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00d0, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x0080, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x0087, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00b6, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00d1, # LATIN SMALL LETTER C WITH CARON + 0x0112: 0x00ed, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0x0089, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0x00b8, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0x00d3, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0x00b7, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00d2, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0x0095, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0x0085, # LATIN SMALL LETTER G WITH CEDILLA + 0x012a: 0x00a1, # LATIN CAPITAL LETTER I WITH MACRON + 0x012b: 0x008c, # LATIN SMALL LETTER I WITH MACRON + 0x012e: 0x00bd, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012f: 0x00d4, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0x00e8, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0x00e9, # LATIN SMALL LETTER K WITH CEDILLA + 0x013b: 0x00ea, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013c: 0x00eb, # LATIN SMALL LETTER L WITH CEDILLA + 0x0141: 0x00ad, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00e7, # LATIN SMALL LETTER N WITH ACUTE + 0x0145: 0x00ee, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0x00ec, # LATIN SMALL LETTER N WITH CEDILLA + 0x014c: 0x00e2, # LATIN CAPITAL LETTER O WITH MACRON + 0x014d: 0x0093, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0x008a, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0x008b, # LATIN SMALL LETTER R WITH CEDILLA + 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0x00be, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00d5, # LATIN SMALL LETTER S WITH CARON + 0x016a: 0x00c7, # LATIN CAPITAL LETTER U WITH MACRON + 0x016b: 0x00d7, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0x00c6, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0x00d6, # LATIN SMALL LETTER U WITH OGONEK + 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00a5, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00a3, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00a4, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00cf, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00d8, # LATIN SMALL LETTER Z WITH CARON + 0x2019: 0x00ef, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0x00f2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00a6, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00f7, # DOUBLE LOW-9 QUOTATION MARK + 0x2219: 0x00f9, # BULLET OPERATOR + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} \ No newline at end of file Index: cp850.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp850.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp850.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp850.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP850.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP850.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH - 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN - 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2017, # DOUBLE LOW LINE - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00b8: 0x00a9, # COPYRIGHT SIGN + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x00a2, # CENT SIGN + 0x00be: 0x00a5, # YEN SIGN + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH + 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x00a6, # BROKEN BAR + 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN + 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00ee: 0x00af, # MACRON + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2017, # DOUBLE LOW LINE + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd7' # 0x009e -> MULTIPLICATION SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\xae' # 0x00a9 -> REGISTERED SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xa9' # 0x00b8 -> COPYRIGHT SIGN + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\xa2' # 0x00bd -> CENT SIGN + u'\xa5' # 0x00be -> YEN SIGN + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE + u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH + u'\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH + u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\u0131' # 0x00d5 -> LATIN SMALL LETTER DOTLESS I + u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\xa6' # 0x00dd -> BROKEN BAR + u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE + u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN + u'\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN + u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE + u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xaf' # 0x00ee -> MACRON + u'\xb4' # 0x00ef -> ACUTE ACCENT + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2017' # 0x00f2 -> DOUBLE LOW LINE + u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0x00f4 -> PILCROW SIGN + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\xb8' # 0x00f7 -> CEDILLA + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\xb9' # 0x00fb -> SUPERSCRIPT ONE + u'\xb3' # 0x00fc -> SUPERSCRIPT THREE + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00bd, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a5: 0x00be, # YEN SIGN + 0x00a6: 0x00dd, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00a9: 0x00b8, # COPYRIGHT SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00ae: 0x00a9, # REGISTERED SIGN + 0x00af: 0x00ee, # MACRON + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00fc, # SUPERSCRIPT THREE + 0x00b4: 0x00ef, # ACUTE ACCENT + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x00f4, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b8: 0x00f7, # CEDILLA + 0x00b9: 0x00fb, # SUPERSCRIPT ONE + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x009e, # MULTIPLICATION SIGN + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x00d0, # LATIN SMALL LETTER ETH + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x00e7, # LATIN SMALL LETTER THORN + 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0131: 0x00d5, # LATIN SMALL LETTER DOTLESS I + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x2017: 0x00f2, # DOUBLE LOW LINE + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} \ No newline at end of file Index: cp852.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp852.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp852.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp852.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP852.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP852.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE - 0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON - 0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON - 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON - 0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON - 0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON - 0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON - 0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON - 0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE - 0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00f2: 0x02db, # OGONEK - 0x00f3: 0x02c7, # CARON - 0x00f4: 0x02d8, # BREVE - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x02d9, # DOT ABOVE - 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON - 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE + 0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON + 0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON + 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON + 0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON + 0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON + 0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON + 0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON + 0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE + 0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00f2: 0x02db, # OGONEK + 0x00f3: 0x02c7, # CARON + 0x00f4: 0x02d8, # BREVE + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x02d9, # DOT ABOVE + 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON + 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u016f' # 0x0085 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\u0107' # 0x0086 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0150' # 0x008a -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\u0151' # 0x008b -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0106' # 0x008f -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0139' # 0x0091 -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u013a' # 0x0092 -> LATIN SMALL LETTER L WITH ACUTE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u013d' # 0x0095 -> LATIN CAPITAL LETTER L WITH CARON + u'\u013e' # 0x0096 -> LATIN SMALL LETTER L WITH CARON + u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0164' # 0x009b -> LATIN CAPITAL LETTER T WITH CARON + u'\u0165' # 0x009c -> LATIN SMALL LETTER T WITH CARON + u'\u0141' # 0x009d -> LATIN CAPITAL LETTER L WITH STROKE + u'\xd7' # 0x009e -> MULTIPLICATION SIGN + u'\u010d' # 0x009f -> LATIN SMALL LETTER C WITH CARON + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\u0104' # 0x00a4 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0105' # 0x00a5 -> LATIN SMALL LETTER A WITH OGONEK + u'\u017d' # 0x00a6 -> LATIN CAPITAL LETTER Z WITH CARON + u'\u017e' # 0x00a7 -> LATIN SMALL LETTER Z WITH CARON + u'\u0118' # 0x00a8 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0119' # 0x00a9 -> LATIN SMALL LETTER E WITH OGONEK + u'\xac' # 0x00aa -> NOT SIGN + u'\u017a' # 0x00ab -> LATIN SMALL LETTER Z WITH ACUTE + u'\u010c' # 0x00ac -> LATIN CAPITAL LETTER C WITH CARON + u'\u015f' # 0x00ad -> LATIN SMALL LETTER S WITH CEDILLA + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u011a' # 0x00b7 -> LATIN CAPITAL LETTER E WITH CARON + u'\u015e' # 0x00b8 -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u017b' # 0x00bd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017c' # 0x00be -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u0102' # 0x00c6 -> LATIN CAPITAL LETTER A WITH BREVE + u'\u0103' # 0x00c7 -> LATIN SMALL LETTER A WITH BREVE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\u0111' # 0x00d0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0110' # 0x00d1 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u010e' # 0x00d2 -> LATIN CAPITAL LETTER D WITH CARON + u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u010f' # 0x00d4 -> LATIN SMALL LETTER D WITH CARON + u'\u0147' # 0x00d5 -> LATIN CAPITAL LETTER N WITH CARON + u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u011b' # 0x00d8 -> LATIN SMALL LETTER E WITH CARON + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u0162' # 0x00dd -> LATIN CAPITAL LETTER T WITH CEDILLA + u'\u016e' # 0x00de -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0144' # 0x00e4 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0148' # 0x00e5 -> LATIN SMALL LETTER N WITH CARON + u'\u0160' # 0x00e6 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0161' # 0x00e7 -> LATIN SMALL LETTER S WITH CARON + u'\u0154' # 0x00e8 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u0155' # 0x00ea -> LATIN SMALL LETTER R WITH ACUTE + u'\u0170' # 0x00eb -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE + u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0163' # 0x00ee -> LATIN SMALL LETTER T WITH CEDILLA + u'\xb4' # 0x00ef -> ACUTE ACCENT + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\u02dd' # 0x00f1 -> DOUBLE ACUTE ACCENT + u'\u02db' # 0x00f2 -> OGONEK + u'\u02c7' # 0x00f3 -> CARON + u'\u02d8' # 0x00f4 -> BREVE + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\xb8' # 0x00f7 -> CEDILLA + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\u02d9' # 0x00fa -> DOT ABOVE + u'\u0171' # 0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\u0158' # 0x00fc -> LATIN CAPITAL LETTER R WITH CARON + u'\u0159' # 0x00fd -> LATIN SMALL LETTER R WITH CARON + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b4: 0x00ef, # ACUTE ACCENT + 0x00b8: 0x00f7, # CEDILLA + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x009e, # MULTIPLICATION SIGN + 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE + 0x0102: 0x00c6, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0x00c7, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0x00a4, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00a5, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x008f, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x0086, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00ac, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x009f, # LATIN SMALL LETTER C WITH CARON + 0x010e: 0x00d2, # LATIN CAPITAL LETTER D WITH CARON + 0x010f: 0x00d4, # LATIN SMALL LETTER D WITH CARON + 0x0110: 0x00d1, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00d0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0x00a8, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00a9, # LATIN SMALL LETTER E WITH OGONEK + 0x011a: 0x00b7, # LATIN CAPITAL LETTER E WITH CARON + 0x011b: 0x00d8, # LATIN SMALL LETTER E WITH CARON + 0x0139: 0x0091, # LATIN CAPITAL LETTER L WITH ACUTE + 0x013a: 0x0092, # LATIN SMALL LETTER L WITH ACUTE + 0x013d: 0x0095, # LATIN CAPITAL LETTER L WITH CARON + 0x013e: 0x0096, # LATIN SMALL LETTER L WITH CARON + 0x0141: 0x009d, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00e4, # LATIN SMALL LETTER N WITH ACUTE + 0x0147: 0x00d5, # LATIN CAPITAL LETTER N WITH CARON + 0x0148: 0x00e5, # LATIN SMALL LETTER N WITH CARON + 0x0150: 0x008a, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0x008b, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0154: 0x00e8, # LATIN CAPITAL LETTER R WITH ACUTE + 0x0155: 0x00ea, # LATIN SMALL LETTER R WITH ACUTE + 0x0158: 0x00fc, # LATIN CAPITAL LETTER R WITH CARON + 0x0159: 0x00fd, # LATIN SMALL LETTER R WITH CARON + 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE + 0x015e: 0x00b8, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00ad, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0x00e6, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00e7, # LATIN SMALL LETTER S WITH CARON + 0x0162: 0x00dd, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x0163: 0x00ee, # LATIN SMALL LETTER T WITH CEDILLA + 0x0164: 0x009b, # LATIN CAPITAL LETTER T WITH CARON + 0x0165: 0x009c, # LATIN SMALL LETTER T WITH CARON + 0x016e: 0x00de, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x016f: 0x0085, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0170: 0x00eb, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0x00fb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00ab, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00bd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00be, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00a6, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00a7, # LATIN SMALL LETTER Z WITH CARON + 0x02c7: 0x00f3, # CARON + 0x02d8: 0x00f4, # BREVE + 0x02d9: 0x00fa, # DOT ABOVE + 0x02db: 0x00f2, # OGONEK + 0x02dd: 0x00f1, # DOUBLE ACUTE ACCENT + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} \ No newline at end of file Index: cp855.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp855.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp855.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp855.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP855.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP855.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE - 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE - 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE - 0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE - 0x0084: 0x0451, # CYRILLIC SMALL LETTER IO - 0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE - 0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE - 0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x008c: 0x0457, # CYRILLIC SMALL LETTER YI - 0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x008e: 0x0458, # CYRILLIC SMALL LETTER JE - 0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE - 0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE - 0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE - 0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE - 0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE - 0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE - 0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE - 0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE - 0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE - 0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE - 0x009c: 0x044e, # CYRILLIC SMALL LETTER YU - 0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00b7: 0x0438, # CYRILLIC SMALL LETTER I - 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00d6: 0x043e, # CYRILLIC SMALL LETTER O - 0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00de: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00e7: 0x0443, # CYRILLIC SMALL LETTER U - 0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00ef: 0x2116, # NUMERO SIGN - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00f7: 0x044d, # CYRILLIC SMALL LETTER E - 0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00fd: 0x00a7, # SECTION SIGN - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE + 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE + 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE + 0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE + 0x0084: 0x0451, # CYRILLIC SMALL LETTER IO + 0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE + 0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE + 0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x008c: 0x0457, # CYRILLIC SMALL LETTER YI + 0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x008e: 0x0458, # CYRILLIC SMALL LETTER JE + 0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE + 0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE + 0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE + 0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE + 0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE + 0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE + 0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE + 0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE + 0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE + 0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE + 0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE + 0x009c: 0x044e, # CYRILLIC SMALL LETTER YU + 0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x00b7: 0x0438, # CYRILLIC SMALL LETTER I + 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x00d6: 0x043e, # CYRILLIC SMALL LETTER O + 0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x00de: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x00e7: 0x0443, # CYRILLIC SMALL LETTER U + 0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x00ef: 0x2116, # NUMERO SIGN + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x00f7: 0x044d, # CYRILLIC SMALL LETTER E + 0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x00fd: 0x00a7, # SECTION SIGN + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0452' # 0x0080 -> CYRILLIC SMALL LETTER DJE + u'\u0402' # 0x0081 -> CYRILLIC CAPITAL LETTER DJE + u'\u0453' # 0x0082 -> CYRILLIC SMALL LETTER GJE + u'\u0403' # 0x0083 -> CYRILLIC CAPITAL LETTER GJE + u'\u0451' # 0x0084 -> CYRILLIC SMALL LETTER IO + u'\u0401' # 0x0085 -> CYRILLIC CAPITAL LETTER IO + u'\u0454' # 0x0086 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0404' # 0x0087 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0455' # 0x0088 -> CYRILLIC SMALL LETTER DZE + u'\u0405' # 0x0089 -> CYRILLIC CAPITAL LETTER DZE + u'\u0456' # 0x008a -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0406' # 0x008b -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0457' # 0x008c -> CYRILLIC SMALL LETTER YI + u'\u0407' # 0x008d -> CYRILLIC CAPITAL LETTER YI + u'\u0458' # 0x008e -> CYRILLIC SMALL LETTER JE + u'\u0408' # 0x008f -> CYRILLIC CAPITAL LETTER JE + u'\u0459' # 0x0090 -> CYRILLIC SMALL LETTER LJE + u'\u0409' # 0x0091 -> CYRILLIC CAPITAL LETTER LJE + u'\u045a' # 0x0092 -> CYRILLIC SMALL LETTER NJE + u'\u040a' # 0x0093 -> CYRILLIC CAPITAL LETTER NJE + u'\u045b' # 0x0094 -> CYRILLIC SMALL LETTER TSHE + u'\u040b' # 0x0095 -> CYRILLIC CAPITAL LETTER TSHE + u'\u045c' # 0x0096 -> CYRILLIC SMALL LETTER KJE + u'\u040c' # 0x0097 -> CYRILLIC CAPITAL LETTER KJE + u'\u045e' # 0x0098 -> CYRILLIC SMALL LETTER SHORT U + u'\u040e' # 0x0099 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045f' # 0x009a -> CYRILLIC SMALL LETTER DZHE + u'\u040f' # 0x009b -> CYRILLIC CAPITAL LETTER DZHE + u'\u044e' # 0x009c -> CYRILLIC SMALL LETTER YU + u'\u042e' # 0x009d -> CYRILLIC CAPITAL LETTER YU + u'\u044a' # 0x009e -> CYRILLIC SMALL LETTER HARD SIGN + u'\u042a' # 0x009f -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A + u'\u0410' # 0x00a1 -> CYRILLIC CAPITAL LETTER A + u'\u0431' # 0x00a2 -> CYRILLIC SMALL LETTER BE + u'\u0411' # 0x00a3 -> CYRILLIC CAPITAL LETTER BE + u'\u0446' # 0x00a4 -> CYRILLIC SMALL LETTER TSE + u'\u0426' # 0x00a5 -> CYRILLIC CAPITAL LETTER TSE + u'\u0434' # 0x00a6 -> CYRILLIC SMALL LETTER DE + u'\u0414' # 0x00a7 -> CYRILLIC CAPITAL LETTER DE + u'\u0435' # 0x00a8 -> CYRILLIC SMALL LETTER IE + u'\u0415' # 0x00a9 -> CYRILLIC CAPITAL LETTER IE + u'\u0444' # 0x00aa -> CYRILLIC SMALL LETTER EF + u'\u0424' # 0x00ab -> CYRILLIC CAPITAL LETTER EF + u'\u0433' # 0x00ac -> CYRILLIC SMALL LETTER GHE + u'\u0413' # 0x00ad -> CYRILLIC CAPITAL LETTER GHE + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u0445' # 0x00b5 -> CYRILLIC SMALL LETTER HA + u'\u0425' # 0x00b6 -> CYRILLIC CAPITAL LETTER HA + u'\u0438' # 0x00b7 -> CYRILLIC SMALL LETTER I + u'\u0418' # 0x00b8 -> CYRILLIC CAPITAL LETTER I + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u0439' # 0x00bd -> CYRILLIC SMALL LETTER SHORT I + u'\u0419' # 0x00be -> CYRILLIC CAPITAL LETTER SHORT I + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u043a' # 0x00c6 -> CYRILLIC SMALL LETTER KA + u'\u041a' # 0x00c7 -> CYRILLIC CAPITAL LETTER KA + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\u043b' # 0x00d0 -> CYRILLIC SMALL LETTER EL + u'\u041b' # 0x00d1 -> CYRILLIC CAPITAL LETTER EL + u'\u043c' # 0x00d2 -> CYRILLIC SMALL LETTER EM + u'\u041c' # 0x00d3 -> CYRILLIC CAPITAL LETTER EM + u'\u043d' # 0x00d4 -> CYRILLIC SMALL LETTER EN + u'\u041d' # 0x00d5 -> CYRILLIC CAPITAL LETTER EN + u'\u043e' # 0x00d6 -> CYRILLIC SMALL LETTER O + u'\u041e' # 0x00d7 -> CYRILLIC CAPITAL LETTER O + u'\u043f' # 0x00d8 -> CYRILLIC SMALL LETTER PE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u041f' # 0x00dd -> CYRILLIC CAPITAL LETTER PE + u'\u044f' # 0x00de -> CYRILLIC SMALL LETTER YA + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u042f' # 0x00e0 -> CYRILLIC CAPITAL LETTER YA + u'\u0440' # 0x00e1 -> CYRILLIC SMALL LETTER ER + u'\u0420' # 0x00e2 -> CYRILLIC CAPITAL LETTER ER + u'\u0441' # 0x00e3 -> CYRILLIC SMALL LETTER ES + u'\u0421' # 0x00e4 -> CYRILLIC CAPITAL LETTER ES + u'\u0442' # 0x00e5 -> CYRILLIC SMALL LETTER TE + u'\u0422' # 0x00e6 -> CYRILLIC CAPITAL LETTER TE + u'\u0443' # 0x00e7 -> CYRILLIC SMALL LETTER U + u'\u0423' # 0x00e8 -> CYRILLIC CAPITAL LETTER U + u'\u0436' # 0x00e9 -> CYRILLIC SMALL LETTER ZHE + u'\u0416' # 0x00ea -> CYRILLIC CAPITAL LETTER ZHE + u'\u0432' # 0x00eb -> CYRILLIC SMALL LETTER VE + u'\u0412' # 0x00ec -> CYRILLIC CAPITAL LETTER VE + u'\u044c' # 0x00ed -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u042c' # 0x00ee -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u2116' # 0x00ef -> NUMERO SIGN + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\u044b' # 0x00f1 -> CYRILLIC SMALL LETTER YERU + u'\u042b' # 0x00f2 -> CYRILLIC CAPITAL LETTER YERU + u'\u0437' # 0x00f3 -> CYRILLIC SMALL LETTER ZE + u'\u0417' # 0x00f4 -> CYRILLIC CAPITAL LETTER ZE + u'\u0448' # 0x00f5 -> CYRILLIC SMALL LETTER SHA + u'\u0428' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHA + u'\u044d' # 0x00f7 -> CYRILLIC SMALL LETTER E + u'\u042d' # 0x00f8 -> CYRILLIC CAPITAL LETTER E + u'\u0449' # 0x00f9 -> CYRILLIC SMALL LETTER SHCHA + u'\u0429' # 0x00fa -> CYRILLIC CAPITAL LETTER SHCHA + u'\u0447' # 0x00fb -> CYRILLIC SMALL LETTER CHE + u'\u0427' # 0x00fc -> CYRILLIC CAPITAL LETTER CHE + u'\xa7' # 0x00fd -> SECTION SIGN + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a7: 0x00fd, # SECTION SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x0401: 0x0085, # CYRILLIC CAPITAL LETTER IO + 0x0402: 0x0081, # CYRILLIC CAPITAL LETTER DJE + 0x0403: 0x0083, # CYRILLIC CAPITAL LETTER GJE + 0x0404: 0x0087, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0x0089, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0x008b, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0x008d, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0x008f, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0x0091, # CYRILLIC CAPITAL LETTER LJE + 0x040a: 0x0093, # CYRILLIC CAPITAL LETTER NJE + 0x040b: 0x0095, # CYRILLIC CAPITAL LETTER TSHE + 0x040c: 0x0097, # CYRILLIC CAPITAL LETTER KJE + 0x040e: 0x0099, # CYRILLIC CAPITAL LETTER SHORT U + 0x040f: 0x009b, # CYRILLIC CAPITAL LETTER DZHE + 0x0410: 0x00a1, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x00a3, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x00ec, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x00ad, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x00a7, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x00a9, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x00ea, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x00f4, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x00b8, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x00be, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x00c7, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x00d1, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x00d3, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x00d5, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x00d7, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x00dd, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x00e2, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x00e4, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x00e6, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x00e8, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x00ab, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x00b6, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x00a5, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x00fc, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x00f6, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x00fa, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x009f, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x00f2, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x00ee, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x00f8, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x009d, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x00e0, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A + 0x0431: 0x00a2, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00eb, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00ac, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00a6, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00a8, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00e9, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00f3, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00b7, # CYRILLIC SMALL LETTER I + 0x0439: 0x00bd, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00c6, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00d0, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00d2, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00d4, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00d6, # CYRILLIC SMALL LETTER O + 0x043f: 0x00d8, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00e1, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00e3, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00e5, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00e7, # CYRILLIC SMALL LETTER U + 0x0444: 0x00aa, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00b5, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00a4, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00fb, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00f5, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00f9, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x009e, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00f1, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00ed, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00f7, # CYRILLIC SMALL LETTER E + 0x044e: 0x009c, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00de, # CYRILLIC SMALL LETTER YA + 0x0451: 0x0084, # CYRILLIC SMALL LETTER IO + 0x0452: 0x0080, # CYRILLIC SMALL LETTER DJE + 0x0453: 0x0082, # CYRILLIC SMALL LETTER GJE + 0x0454: 0x0086, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0x0088, # CYRILLIC SMALL LETTER DZE + 0x0456: 0x008a, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0x008c, # CYRILLIC SMALL LETTER YI + 0x0458: 0x008e, # CYRILLIC SMALL LETTER JE + 0x0459: 0x0090, # CYRILLIC SMALL LETTER LJE + 0x045a: 0x0092, # CYRILLIC SMALL LETTER NJE + 0x045b: 0x0094, # CYRILLIC SMALL LETTER TSHE + 0x045c: 0x0096, # CYRILLIC SMALL LETTER KJE + 0x045e: 0x0098, # CYRILLIC SMALL LETTER SHORT U + 0x045f: 0x009a, # CYRILLIC SMALL LETTER DZHE + 0x2116: 0x00ef, # NUMERO SIGN + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} \ No newline at end of file Index: cp856.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp856.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp856.py 8 Aug 2002 20:19:19 -0000 1.5 +++ cp856.py 21 Oct 2005 13:49:12 -0000 1.6 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP856.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MISC/CP856.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,613 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x05d0, # HEBREW LETTER ALEF - 0x0081: 0x05d1, # HEBREW LETTER BET - 0x0082: 0x05d2, # HEBREW LETTER GIMEL - 0x0083: 0x05d3, # HEBREW LETTER DALET - 0x0084: 0x05d4, # HEBREW LETTER HE - 0x0085: 0x05d5, # HEBREW LETTER VAV - 0x0086: 0x05d6, # HEBREW LETTER ZAYIN - 0x0087: 0x05d7, # HEBREW LETTER HET - 0x0088: 0x05d8, # HEBREW LETTER TET - 0x0089: 0x05d9, # HEBREW LETTER YOD - 0x008a: 0x05da, # HEBREW LETTER FINAL KAF - 0x008b: 0x05db, # HEBREW LETTER KAF - 0x008c: 0x05dc, # HEBREW LETTER LAMED - 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM - 0x008e: 0x05de, # HEBREW LETTER MEM - 0x008f: 0x05df, # HEBREW LETTER FINAL NUN - 0x0090: 0x05e0, # HEBREW LETTER NUN - 0x0091: 0x05e1, # HEBREW LETTER SAMEKH - 0x0092: 0x05e2, # HEBREW LETTER AYIN - 0x0093: 0x05e3, # HEBREW LETTER FINAL PE - 0x0094: 0x05e4, # HEBREW LETTER PE - 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x0096: 0x05e6, # HEBREW LETTER TSADI - 0x0097: 0x05e7, # HEBREW LETTER QOF - 0x0098: 0x05e8, # HEBREW LETTER RESH - 0x0099: 0x05e9, # HEBREW LETTER SHIN - 0x009a: 0x05ea, # HEBREW LETTER TAV - 0x009b: None, # UNDEFINED - 0x009c: 0x00a3, # POUND SIGN - 0x009d: None, # UNDEFINED - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: None, # UNDEFINED - 0x00a0: None, # UNDEFINED - 0x00a1: None, # UNDEFINED - 0x00a2: None, # UNDEFINED - 0x00a3: None, # UNDEFINED - 0x00a4: None, # UNDEFINED - 0x00a5: None, # UNDEFINED - 0x00a6: None, # UNDEFINED - 0x00a7: None, # UNDEFINED - 0x00a8: None, # UNDEFINED - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: None, # UNDEFINED - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: None, # UNDEFINED - 0x00b6: None, # UNDEFINED - 0x00b7: None, # UNDEFINED - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: None, # UNDEFINED - 0x00c7: None, # UNDEFINED - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: None, # UNDEFINED - 0x00d1: None, # UNDEFINED - 0x00d2: None, # UNDEFINED - 0x00d3: None, # UNDEFINEDS - 0x00d4: None, # UNDEFINED - 0x00d5: None, # UNDEFINED - 0x00d6: None, # UNDEFINEDE - 0x00d7: None, # UNDEFINED - 0x00d8: None, # UNDEFINED - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: None, # UNDEFINED - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: None, # UNDEFINED - 0x00e1: None, # UNDEFINED - 0x00e2: None, # UNDEFINED - 0x00e3: None, # UNDEFINED - 0x00e4: None, # UNDEFINED - 0x00e5: None, # UNDEFINED - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: None, # UNDEFINED - 0x00e8: None, # UNDEFINED - 0x00e9: None, # UNDEFINED - 0x00ea: None, # UNDEFINED - 0x00eb: None, # UNDEFINED - 0x00ec: None, # UNDEFINED - 0x00ed: None, # UNDEFINED - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2017, # DOUBLE LOW LINE - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x05d0, # HEBREW LETTER ALEF + 0x0081: 0x05d1, # HEBREW LETTER BET + 0x0082: 0x05d2, # HEBREW LETTER GIMEL + 0x0083: 0x05d3, # HEBREW LETTER DALET + 0x0084: 0x05d4, # HEBREW LETTER HE + 0x0085: 0x05d5, # HEBREW LETTER VAV + 0x0086: 0x05d6, # HEBREW LETTER ZAYIN + 0x0087: 0x05d7, # HEBREW LETTER HET + 0x0088: 0x05d8, # HEBREW LETTER TET + 0x0089: 0x05d9, # HEBREW LETTER YOD + 0x008a: 0x05da, # HEBREW LETTER FINAL KAF + 0x008b: 0x05db, # HEBREW LETTER KAF + 0x008c: 0x05dc, # HEBREW LETTER LAMED + 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM + 0x008e: 0x05de, # HEBREW LETTER MEM + 0x008f: 0x05df, # HEBREW LETTER FINAL NUN + 0x0090: 0x05e0, # HEBREW LETTER NUN + 0x0091: 0x05e1, # HEBREW LETTER SAMEKH + 0x0092: 0x05e2, # HEBREW LETTER AYIN + 0x0093: 0x05e3, # HEBREW LETTER FINAL PE + 0x0094: 0x05e4, # HEBREW LETTER PE + 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x0096: 0x05e6, # HEBREW LETTER TSADI + 0x0097: 0x05e7, # HEBREW LETTER QOF + 0x0098: 0x05e8, # HEBREW LETTER RESH + 0x0099: 0x05e9, # HEBREW LETTER SHIN + 0x009a: 0x05ea, # HEBREW LETTER TAV + 0x009b: None, # UNDEFINED + 0x009c: 0x00a3, # POUND SIGN + 0x009d: None, # UNDEFINED + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: None, # UNDEFINED + 0x00a0: None, # UNDEFINED + 0x00a1: None, # UNDEFINED + 0x00a2: None, # UNDEFINED + 0x00a3: None, # UNDEFINED + 0x00a4: None, # UNDEFINED + 0x00a5: None, # UNDEFINED + 0x00a6: None, # UNDEFINED + 0x00a7: None, # UNDEFINED + 0x00a8: None, # UNDEFINED + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: None, # UNDEFINED + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: None, # UNDEFINED + 0x00b6: None, # UNDEFINED + 0x00b7: None, # UNDEFINED + 0x00b8: 0x00a9, # COPYRIGHT SIGN + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x00a2, # CENT SIGN + 0x00be: 0x00a5, # YEN SIGN + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: None, # UNDEFINED + 0x00c7: None, # UNDEFINED + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: None, # UNDEFINED + 0x00d1: None, # UNDEFINED + 0x00d2: None, # UNDEFINED + 0x00d3: None, # UNDEFINEDS + 0x00d4: None, # UNDEFINED + 0x00d5: None, # UNDEFINED + 0x00d6: None, # UNDEFINEDE + 0x00d7: None, # UNDEFINED + 0x00d8: None, # UNDEFINED + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x00a6, # BROKEN BAR + 0x00de: None, # UNDEFINED + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: None, # UNDEFINED + 0x00e1: None, # UNDEFINED + 0x00e2: None, # UNDEFINED + 0x00e3: None, # UNDEFINED + 0x00e4: None, # UNDEFINED + 0x00e5: None, # UNDEFINED + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: None, # UNDEFINED + 0x00e8: None, # UNDEFINED + 0x00e9: None, # UNDEFINED + 0x00ea: None, # UNDEFINED + 0x00eb: None, # UNDEFINED + 0x00ec: None, # UNDEFINED + 0x00ed: None, # UNDEFINED + 0x00ee: 0x00af, # MACRON + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2017, # DOUBLE LOW LINE + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u05d0' # 0x0080 -> HEBREW LETTER ALEF + u'\u05d1' # 0x0081 -> HEBREW LETTER BET + u'\u05d2' # 0x0082 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x0083 -> HEBREW LETTER DALET + u'\u05d4' # 0x0084 -> HEBREW LETTER HE + u'\u05d5' # 0x0085 -> HEBREW LETTER VAV + u'\u05d6' # 0x0086 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x0087 -> HEBREW LETTER HET + u'\u05d8' # 0x0088 -> HEBREW LETTER TET + u'\u05d9' # 0x0089 -> HEBREW LETTER YOD + u'\u05da' # 0x008a -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x008b -> HEBREW LETTER KAF + u'\u05dc' # 0x008c -> HEBREW LETTER LAMED + u'\u05dd' # 0x008d -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x008e -> HEBREW LETTER MEM + u'\u05df' # 0x008f -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x0090 -> HEBREW LETTER NUN + u'\u05e1' # 0x0091 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0x0092 -> HEBREW LETTER AYIN + u'\u05e3' # 0x0093 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x0094 -> HEBREW LETTER PE + u'\u05e5' # 0x0095 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x0096 -> HEBREW LETTER TSADI + u'\u05e7' # 0x0097 -> HEBREW LETTER QOF + u'\u05e8' # 0x0098 -> HEBREW LETTER RESH + u'\u05e9' # 0x0099 -> HEBREW LETTER SHIN + u'\u05ea' # 0x009a -> HEBREW LETTER TAV + u'\ufffe' # 0x009b -> UNDEFINED + u'\xa3' # 0x009c -> POUND SIGN + u'\ufffe' # 0x009d -> UNDEFINED + u'\xd7' # 0x009e -> MULTIPLICATION SIGN + u'\ufffe' # 0x009f -> UNDEFINED + u'\ufffe' # 0x00a0 -> UNDEFINED + u'\ufffe' # 0x00a1 -> UNDEFINED + u'\ufffe' # 0x00a2 -> UNDEFINED + u'\ufffe' # 0x00a3 -> UNDEFINED + u'\ufffe' # 0x00a4 -> UNDEFINED + u'\ufffe' # 0x00a5 -> UNDEFINED + u'\ufffe' # 0x00a6 -> UNDEFINED + u'\ufffe' # 0x00a7 -> UNDEFINED + u'\ufffe' # 0x00a8 -> UNDEFINED + u'\xae' # 0x00a9 -> REGISTERED SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\ufffe' # 0x00ad -> UNDEFINED + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\ufffe' # 0x00b5 -> UNDEFINED + u'\ufffe' # 0x00b6 -> UNDEFINED + u'\ufffe' # 0x00b7 -> UNDEFINED + u'\xa9' # 0x00b8 -> COPYRIGHT SIGN + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\xa2' # 0x00bd -> CENT SIGN + u'\xa5' # 0x00be -> YEN SIGN + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\ufffe' # 0x00c6 -> UNDEFINED + u'\ufffe' # 0x00c7 -> UNDEFINED + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\ufffe' # 0x00d0 -> UNDEFINED + u'\ufffe' # 0x00d1 -> UNDEFINED + u'\ufffe' # 0x00d2 -> UNDEFINED + u'\ufffe' # 0x00d3 -> UNDEFINEDS + u'\ufffe' # 0x00d4 -> UNDEFINED + u'\ufffe' # 0x00d5 -> UNDEFINED + u'\ufffe' # 0x00d6 -> UNDEFINEDE + u'\ufffe' # 0x00d7 -> UNDEFINED + u'\ufffe' # 0x00d8 -> UNDEFINED + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\xa6' # 0x00dd -> BROKEN BAR + u'\ufffe' # 0x00de -> UNDEFINED + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\ufffe' # 0x00e0 -> UNDEFINED + u'\ufffe' # 0x00e1 -> UNDEFINED + u'\ufffe' # 0x00e2 -> UNDEFINED + u'\ufffe' # 0x00e3 -> UNDEFINED + u'\ufffe' # 0x00e4 -> UNDEFINED + u'\ufffe' # 0x00e5 -> UNDEFINED + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\ufffe' # 0x00e7 -> UNDEFINED + u'\ufffe' # 0x00e8 -> UNDEFINED + u'\ufffe' # 0x00e9 -> UNDEFINED + u'\ufffe' # 0x00ea -> UNDEFINED + u'\ufffe' # 0x00eb -> UNDEFINED + u'\ufffe' # 0x00ec -> UNDEFINED + u'\ufffe' # 0x00ed -> UNDEFINED + u'\xaf' # 0x00ee -> MACRON + u'\xb4' # 0x00ef -> ACUTE ACCENT + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2017' # 0x00f2 -> DOUBLE LOW LINE + u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0x00f4 -> PILCROW SIGN + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\xb8' # 0x00f7 -> CEDILLA + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\xb9' # 0x00fb -> SUPERSCRIPT ONE + u'\xb3' # 0x00fc -> SUPERSCRIPT THREE + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a2: 0x00bd, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a5: 0x00be, # YEN SIGN + 0x00a6: 0x00dd, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00a9: 0x00b8, # COPYRIGHT SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00ae: 0x00a9, # REGISTERED SIGN + 0x00af: 0x00ee, # MACRON + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00fc, # SUPERSCRIPT THREE + 0x00b4: 0x00ef, # ACUTE ACCENT + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x00f4, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b8: 0x00f7, # CEDILLA + 0x00b9: 0x00fb, # SUPERSCRIPT ONE + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS + 0x00d7: 0x009e, # MULTIPLICATION SIGN + 0x00f7: 0x00f6, # DIVISION SIGN + 0x05d0: 0x0080, # HEBREW LETTER ALEF + 0x05d1: 0x0081, # HEBREW LETTER BET + 0x05d2: 0x0082, # HEBREW LETTER GIMEL + 0x05d3: 0x0083, # HEBREW LETTER DALET + 0x05d4: 0x0084, # HEBREW LETTER HE + 0x05d5: 0x0085, # HEBREW LETTER VAV + 0x05d6: 0x0086, # HEBREW LETTER ZAYIN + 0x05d7: 0x0087, # HEBREW LETTER HET + 0x05d8: 0x0088, # HEBREW LETTER TET + 0x05d9: 0x0089, # HEBREW LETTER YOD + 0x05da: 0x008a, # HEBREW LETTER FINAL KAF + 0x05db: 0x008b, # HEBREW LETTER KAF + 0x05dc: 0x008c, # HEBREW LETTER LAMED + 0x05dd: 0x008d, # HEBREW LETTER FINAL MEM + 0x05de: 0x008e, # HEBREW LETTER MEM + 0x05df: 0x008f, # HEBREW LETTER FINAL NUN + 0x05e0: 0x0090, # HEBREW LETTER NUN + 0x05e1: 0x0091, # HEBREW LETTER SAMEKH + 0x05e2: 0x0092, # HEBREW LETTER AYIN + 0x05e3: 0x0093, # HEBREW LETTER FINAL PE + 0x05e4: 0x0094, # HEBREW LETTER PE + 0x05e5: 0x0095, # HEBREW LETTER FINAL TSADI + 0x05e6: 0x0096, # HEBREW LETTER TSADI + 0x05e7: 0x0097, # HEBREW LETTER QOF + 0x05e8: 0x0098, # HEBREW LETTER RESH + 0x05e9: 0x0099, # HEBREW LETTER SHIN + 0x05ea: 0x009a, # HEBREW LETTER TAV + 0x2017: 0x00f2, # DOUBLE LOW LINE + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} \ No newline at end of file Index: cp857.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp857.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp857.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp857.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP857.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP857.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,135 +32,650 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00d5: None, # UNDEFINED - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: None, # UNDEFINED - 0x00e8: 0x00d7, # MULTIPLICATION SIGN - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: None, # UNDEFINED - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00b8: 0x00a9, # COPYRIGHT SIGN + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x00a2, # CENT SIGN + 0x00be: 0x00a5, # YEN SIGN + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00d5: None, # UNDEFINED + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x00a6, # BROKEN BAR + 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: None, # UNDEFINED + 0x00e8: 0x00d7, # MULTIPLICATION SIGN + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00ee: 0x00af, # MACRON + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: None, # UNDEFINED + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u0131' # 0x008d -> LATIN SMALL LETTER DOTLESS I + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\u0130' # 0x0098 -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\u015e' # 0x009e -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u015f' # 0x009f -> LATIN SMALL LETTER S WITH CEDILLA + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\u011e' # 0x00a6 -> LATIN CAPITAL LETTER G WITH BREVE + u'\u011f' # 0x00a7 -> LATIN SMALL LETTER G WITH BREVE + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\xae' # 0x00a9 -> REGISTERED SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xa9' # 0x00b8 -> COPYRIGHT SIGN + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\xa2' # 0x00bd -> CENT SIGN + u'\xa5' # 0x00be -> YEN SIGN + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE + u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\xba' # 0x00d0 -> MASCULINE ORDINAL INDICATOR + u'\xaa' # 0x00d1 -> FEMININE ORDINAL INDICATOR + u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\ufffe' # 0x00d5 -> UNDEFINED + u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\xa6' # 0x00dd -> BROKEN BAR + u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE + u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\ufffe' # 0x00e7 -> UNDEFINED + u'\xd7' # 0x00e8 -> MULTIPLICATION SIGN + u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xff' # 0x00ed -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xaf' # 0x00ee -> MACRON + u'\xb4' # 0x00ef -> ACUTE ACCENT + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\ufffe' # 0x00f2 -> UNDEFINED + u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0x00f4 -> PILCROW SIGN + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\xb8' # 0x00f7 -> CEDILLA + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\xb9' # 0x00fb -> SUPERSCRIPT ONE + u'\xb3' # 0x00fc -> SUPERSCRIPT THREE + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00bd, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a5: 0x00be, # YEN SIGN + 0x00a6: 0x00dd, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00a9: 0x00b8, # COPYRIGHT SIGN + 0x00aa: 0x00d1, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00ae: 0x00a9, # REGISTERED SIGN + 0x00af: 0x00ee, # MACRON + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00fc, # SUPERSCRIPT THREE + 0x00b4: 0x00ef, # ACUTE ACCENT + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x00f4, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b8: 0x00f7, # CEDILLA + 0x00b9: 0x00fb, # SUPERSCRIPT ONE + 0x00ba: 0x00d0, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00e8, # MULTIPLICATION SIGN + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00ed, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011e: 0x00a6, # LATIN CAPITAL LETTER G WITH BREVE + 0x011f: 0x00a7, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0x0098, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0x008d, # LATIN SMALL LETTER DOTLESS I + 0x015e: 0x009e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x009f, # LATIN SMALL LETTER S WITH CEDILLA + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} \ No newline at end of file Index: cp860.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp860.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp860.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp860.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP860.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP860.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x0084 -> LATIN SMALL LETTER A WITH TILDE + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xc1' # 0x0086 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xca' # 0x0089 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xcd' # 0x008b -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xd4' # 0x008c -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc3' # 0x008e -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc2' # 0x008f -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xc0' # 0x0091 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc8' # 0x0092 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x0094 -> LATIN SMALL LETTER O WITH TILDE + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xda' # 0x0096 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xcc' # 0x0098 -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd5' # 0x0099 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u20a7' # 0x009e -> PESETA SIGN + u'\xd3' # 0x009f -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\xd2' # 0x00a9 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c0: 0x0091, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x0086, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x008f, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x008e, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x0092, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x0089, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cc: 0x0098, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x008b, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00a9, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x009f, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x008c, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x0099, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x0096, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x0084, # LATIN SMALL LETTER A WITH TILDE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x0094, # LATIN SMALL LETTER O WITH TILDE + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} \ No newline at end of file Index: cp861.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp861.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp861.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp861.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP861.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP861.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x008c: 0x00f0, # LATIN SMALL LETTER ETH - 0x008d: 0x00de, # LATIN CAPITAL LETTER THORN - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00fe, # LATIN SMALL LETTER THORN - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x008c: 0x00f0, # LATIN SMALL LETTER ETH + 0x008d: 0x00de, # LATIN CAPITAL LETTER THORN + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00fe, # LATIN SMALL LETTER THORN + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xd0' # 0x008b -> LATIN CAPITAL LETTER ETH + u'\xf0' # 0x008c -> LATIN SMALL LETTER ETH + u'\xde' # 0x008d -> LATIN CAPITAL LETTER THORN + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xfe' # 0x0095 -> LATIN SMALL LETTER THORN + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xdd' # 0x0097 -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xfd' # 0x0098 -> LATIN SMALL LETTER Y WITH ACUTE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xc1' # 0x00a4 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcd' # 0x00a5 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xd3' # 0x00a6 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xda' # 0x00a7 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a3: 0x009c, # POUND SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c1: 0x00a4, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cd: 0x00a5, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d0: 0x008b, # LATIN CAPITAL LETTER ETH + 0x00d3: 0x00a6, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00da: 0x00a7, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x0097, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x008d, # LATIN CAPITAL LETTER THORN + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00f0: 0x008c, # LATIN SMALL LETTER ETH + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x0098, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x0095, # LATIN SMALL LETTER THORN + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} \ No newline at end of file Index: cp862.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp862.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp862.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp862.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP862.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP862.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x05d0, # HEBREW LETTER ALEF - 0x0081: 0x05d1, # HEBREW LETTER BET - 0x0082: 0x05d2, # HEBREW LETTER GIMEL - 0x0083: 0x05d3, # HEBREW LETTER DALET - 0x0084: 0x05d4, # HEBREW LETTER HE - 0x0085: 0x05d5, # HEBREW LETTER VAV - 0x0086: 0x05d6, # HEBREW LETTER ZAYIN - 0x0087: 0x05d7, # HEBREW LETTER HET - 0x0088: 0x05d8, # HEBREW LETTER TET - 0x0089: 0x05d9, # HEBREW LETTER YOD - 0x008a: 0x05da, # HEBREW LETTER FINAL KAF - 0x008b: 0x05db, # HEBREW LETTER KAF - 0x008c: 0x05dc, # HEBREW LETTER LAMED - 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM - 0x008e: 0x05de, # HEBREW LETTER MEM - 0x008f: 0x05df, # HEBREW LETTER FINAL NUN - 0x0090: 0x05e0, # HEBREW LETTER NUN - 0x0091: 0x05e1, # HEBREW LETTER SAMEKH - 0x0092: 0x05e2, # HEBREW LETTER AYIN - 0x0093: 0x05e3, # HEBREW LETTER FINAL PE - 0x0094: 0x05e4, # HEBREW LETTER PE - 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x0096: 0x05e6, # HEBREW LETTER TSADI - 0x0097: 0x05e7, # HEBREW LETTER QOF - 0x0098: 0x05e8, # HEBREW LETTER RESH - 0x0099: 0x05e9, # HEBREW LETTER SHIN - 0x009a: 0x05ea, # HEBREW LETTER TAV - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00a5, # YEN SIGN - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x05d0, # HEBREW LETTER ALEF + 0x0081: 0x05d1, # HEBREW LETTER BET + 0x0082: 0x05d2, # HEBREW LETTER GIMEL + 0x0083: 0x05d3, # HEBREW LETTER DALET + 0x0084: 0x05d4, # HEBREW LETTER HE + 0x0085: 0x05d5, # HEBREW LETTER VAV + 0x0086: 0x05d6, # HEBREW LETTER ZAYIN + 0x0087: 0x05d7, # HEBREW LETTER HET + 0x0088: 0x05d8, # HEBREW LETTER TET + 0x0089: 0x05d9, # HEBREW LETTER YOD + 0x008a: 0x05da, # HEBREW LETTER FINAL KAF + 0x008b: 0x05db, # HEBREW LETTER KAF + 0x008c: 0x05dc, # HEBREW LETTER LAMED + 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM + 0x008e: 0x05de, # HEBREW LETTER MEM + 0x008f: 0x05df, # HEBREW LETTER FINAL NUN + 0x0090: 0x05e0, # HEBREW LETTER NUN + 0x0091: 0x05e1, # HEBREW LETTER SAMEKH + 0x0092: 0x05e2, # HEBREW LETTER AYIN + 0x0093: 0x05e3, # HEBREW LETTER FINAL PE + 0x0094: 0x05e4, # HEBREW LETTER PE + 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x0096: 0x05e6, # HEBREW LETTER TSADI + 0x0097: 0x05e7, # HEBREW LETTER QOF + 0x0098: 0x05e8, # HEBREW LETTER RESH + 0x0099: 0x05e9, # HEBREW LETTER SHIN + 0x009a: 0x05ea, # HEBREW LETTER TAV + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00a5, # YEN SIGN + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u05d0' # 0x0080 -> HEBREW LETTER ALEF + u'\u05d1' # 0x0081 -> HEBREW LETTER BET + u'\u05d2' # 0x0082 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x0083 -> HEBREW LETTER DALET + u'\u05d4' # 0x0084 -> HEBREW LETTER HE + u'\u05d5' # 0x0085 -> HEBREW LETTER VAV + u'\u05d6' # 0x0086 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x0087 -> HEBREW LETTER HET + u'\u05d8' # 0x0088 -> HEBREW LETTER TET + u'\u05d9' # 0x0089 -> HEBREW LETTER YOD + u'\u05da' # 0x008a -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x008b -> HEBREW LETTER KAF + u'\u05dc' # 0x008c -> HEBREW LETTER LAMED + u'\u05dd' # 0x008d -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x008e -> HEBREW LETTER MEM + u'\u05df' # 0x008f -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x0090 -> HEBREW LETTER NUN + u'\u05e1' # 0x0091 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0x0092 -> HEBREW LETTER AYIN + u'\u05e3' # 0x0093 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x0094 -> HEBREW LETTER PE + u'\u05e5' # 0x0095 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x0096 -> HEBREW LETTER TSADI + u'\u05e7' # 0x0097 -> HEBREW LETTER QOF + u'\u05e8' # 0x0098 -> HEBREW LETTER RESH + u'\u05e9' # 0x0099 -> HEBREW LETTER SHIN + u'\u05ea' # 0x009a -> HEBREW LETTER TAV + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xa5' # 0x009d -> YEN SIGN + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN) + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a5: 0x009d, # YEN SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x05d0: 0x0080, # HEBREW LETTER ALEF + 0x05d1: 0x0081, # HEBREW LETTER BET + 0x05d2: 0x0082, # HEBREW LETTER GIMEL + 0x05d3: 0x0083, # HEBREW LETTER DALET + 0x05d4: 0x0084, # HEBREW LETTER HE + 0x05d5: 0x0085, # HEBREW LETTER VAV + 0x05d6: 0x0086, # HEBREW LETTER ZAYIN + 0x05d7: 0x0087, # HEBREW LETTER HET + 0x05d8: 0x0088, # HEBREW LETTER TET + 0x05d9: 0x0089, # HEBREW LETTER YOD + 0x05da: 0x008a, # HEBREW LETTER FINAL KAF + 0x05db: 0x008b, # HEBREW LETTER KAF + 0x05dc: 0x008c, # HEBREW LETTER LAMED + 0x05dd: 0x008d, # HEBREW LETTER FINAL MEM + 0x05de: 0x008e, # HEBREW LETTER MEM + 0x05df: 0x008f, # HEBREW LETTER FINAL NUN + 0x05e0: 0x0090, # HEBREW LETTER NUN + 0x05e1: 0x0091, # HEBREW LETTER SAMEKH + 0x05e2: 0x0092, # HEBREW LETTER AYIN + 0x05e3: 0x0093, # HEBREW LETTER FINAL PE + 0x05e4: 0x0094, # HEBREW LETTER PE + 0x05e5: 0x0095, # HEBREW LETTER FINAL TSADI + 0x05e6: 0x0096, # HEBREW LETTER TSADI + 0x05e7: 0x0097, # HEBREW LETTER QOF + 0x05e8: 0x0098, # HEBREW LETTER RESH + 0x05e9: 0x0099, # HEBREW LETTER SHIN + 0x05ea: 0x009a, # HEBREW LETTER TAV + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} \ No newline at end of file Index: cp863.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp863.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp863.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp863.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP863.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP863.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00b6, # PILCROW SIGN - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x2017, # DOUBLE LOW LINE - 0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x008f: 0x00a7, # SECTION SIGN - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00a4, # CURRENCY SIGN - 0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00a6, # BROKEN BAR - 0x00a1: 0x00b4, # ACUTE ACCENT - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00a8, # DIAERESIS - 0x00a5: 0x00b8, # CEDILLA - 0x00a6: 0x00b3, # SUPERSCRIPT THREE - 0x00a7: 0x00af, # MACRON - 0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00b6, # PILCROW SIGN + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x2017, # DOUBLE LOW LINE + 0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x008f: 0x00a7, # SECTION SIGN + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00a4, # CURRENCY SIGN + 0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00a6, # BROKEN BAR + 0x00a1: 0x00b4, # ACUTE ACCENT + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00a8, # DIAERESIS + 0x00a5: 0x00b8, # CEDILLA + 0x00a6: 0x00b3, # SUPERSCRIPT THREE + 0x00a7: 0x00af, # MACRON + 0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xc2' # 0x0084 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xb6' # 0x0086 -> PILCROW SIGN + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u2017' # 0x008d -> DOUBLE LOW LINE + u'\xc0' # 0x008e -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xa7' # 0x008f -> SECTION SIGN + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xc8' # 0x0091 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xca' # 0x0092 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xcb' # 0x0094 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcf' # 0x0095 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xa4' # 0x0098 -> CURRENCY SIGN + u'\xd4' # 0x0099 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xdb' # 0x009e -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xa6' # 0x00a0 -> BROKEN BAR + u'\xb4' # 0x00a1 -> ACUTE ACCENT + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xa8' # 0x00a4 -> DIAERESIS + u'\xb8' # 0x00a5 -> CEDILLA + u'\xb3' # 0x00a6 -> SUPERSCRIPT THREE + u'\xaf' # 0x00a7 -> MACRON + u'\xce' # 0x00a8 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xbe' # 0x00ad -> VULGAR FRACTION THREE QUARTERS + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x0098, # CURRENCY SIGN + 0x00a6: 0x00a0, # BROKEN BAR + 0x00a7: 0x008f, # SECTION SIGN + 0x00a8: 0x00a4, # DIAERESIS + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00af: 0x00a7, # MACRON + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00a6, # SUPERSCRIPT THREE + 0x00b4: 0x00a1, # ACUTE ACCENT + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x0086, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b8: 0x00a5, # CEDILLA + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00ad, # VULGAR FRACTION THREE QUARTERS + 0x00c0: 0x008e, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c2: 0x0084, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x0091, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x0092, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x0094, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00ce: 0x00a8, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x0095, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d4: 0x0099, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00db: 0x009e, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x2017: 0x008d, # DOUBLE LOW LINE + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} \ No newline at end of file Index: cp864.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp864.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp864.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp864.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP864.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP864.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,134 +32,646 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0025: 0x066a, # ARABIC PERCENT SIGN - 0x0080: 0x00b0, # DEGREE SIGN - 0x0081: 0x00b7, # MIDDLE DOT - 0x0082: 0x2219, # BULLET OPERATOR - 0x0083: 0x221a, # SQUARE ROOT - 0x0084: 0x2592, # MEDIUM SHADE - 0x0085: 0x2500, # FORMS LIGHT HORIZONTAL - 0x0086: 0x2502, # FORMS LIGHT VERTICAL - 0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL - 0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT - 0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL - 0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT - 0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL - 0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT - 0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT - 0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT - 0x008f: 0x2518, # FORMS LIGHT UP AND LEFT - 0x0090: 0x03b2, # GREEK SMALL BETA - 0x0091: 0x221e, # INFINITY - 0x0092: 0x03c6, # GREEK SMALL PHI - 0x0093: 0x00b1, # PLUS-OR-MINUS SIGN - 0x0094: 0x00bd, # FRACTION 1/2 - 0x0095: 0x00bc, # FRACTION 1/4 - 0x0096: 0x2248, # ALMOST EQUAL TO - 0x0097: 0x00ab, # LEFT POINTING GUILLEMET - 0x0098: 0x00bb, # RIGHT POINTING GUILLEMET - 0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM - 0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM - 0x009b: None, # UNDEFINED - 0x009c: None, # UNDEFINED - 0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM - 0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM - 0x009f: None, # UNDEFINED - 0x00a1: 0x00ad, # SOFT HYPHEN - 0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM - 0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM - 0x00a6: None, # UNDEFINED - 0x00a7: None, # UNDEFINED - 0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM - 0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM - 0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM - 0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM - 0x00ac: 0x060c, # ARABIC COMMA - 0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM - 0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM - 0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM - 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO - 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE - 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO - 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE - 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR - 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE - 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX - 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN - 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT - 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE - 0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM - 0x00bb: 0x061b, # ARABIC SEMICOLON - 0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM - 0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM - 0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: 0x00a2, # CENT SIGN - 0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM - 0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - 0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM - 0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - 0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM - 0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - 0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM - 0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM - 0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM - 0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM - 0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM - 0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM - 0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM - 0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM - 0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM - 0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM - 0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM - 0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM - 0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM - 0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM - 0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM - 0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM - 0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM - 0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM - 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM - 0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM - 0x00db: 0x00a6, # BROKEN VERTICAL BAR - 0x00dc: 0x00ac, # NOT SIGN - 0x00dd: 0x00f7, # DIVISION SIGN - 0x00de: 0x00d7, # MULTIPLICATION SIGN - 0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM - 0x00e0: 0x0640, # ARABIC TATWEEL - 0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM - 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM - 0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM - 0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM - 0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM - 0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM - 0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM - 0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM - 0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM - 0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM - 0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM - 0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM - 0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM - 0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM - 0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM - 0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM - 0x00f1: 0x0651, # ARABIC SHADDAH - 0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM - 0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM - 0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM - 0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM - 0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM - 0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM - 0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM - 0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM - 0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM - 0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM - 0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM - 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: None, # UNDEFINED + 0x0025: 0x066a, # ARABIC PERCENT SIGN + 0x0080: 0x00b0, # DEGREE SIGN + 0x0081: 0x00b7, # MIDDLE DOT + 0x0082: 0x2219, # BULLET OPERATOR + 0x0083: 0x221a, # SQUARE ROOT + 0x0084: 0x2592, # MEDIUM SHADE + 0x0085: 0x2500, # FORMS LIGHT HORIZONTAL + 0x0086: 0x2502, # FORMS LIGHT VERTICAL + 0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL + 0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT + 0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL + 0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT + 0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL + 0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT + 0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT + 0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT + 0x008f: 0x2518, # FORMS LIGHT UP AND LEFT + 0x0090: 0x03b2, # GREEK SMALL BETA + 0x0091: 0x221e, # INFINITY + 0x0092: 0x03c6, # GREEK SMALL PHI + 0x0093: 0x00b1, # PLUS-OR-MINUS SIGN + 0x0094: 0x00bd, # FRACTION 1/2 + 0x0095: 0x00bc, # FRACTION 1/4 + 0x0096: 0x2248, # ALMOST EQUAL TO + 0x0097: 0x00ab, # LEFT POINTING GUILLEMET + 0x0098: 0x00bb, # RIGHT POINTING GUILLEMET + 0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM + 0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM + 0x009b: None, # UNDEFINED + 0x009c: None, # UNDEFINED + 0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM + 0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM + 0x009f: None, # UNDEFINED + 0x00a1: 0x00ad, # SOFT HYPHEN + 0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM + 0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM + 0x00a6: None, # UNDEFINED + 0x00a7: None, # UNDEFINED + 0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM + 0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM + 0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM + 0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM + 0x00ac: 0x060c, # ARABIC COMMA + 0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM + 0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM + 0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM + 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO + 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE + 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO + 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE + 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR + 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE + 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX + 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN + 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT + 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE + 0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM + 0x00bb: 0x061b, # ARABIC SEMICOLON + 0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM + 0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM + 0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM + 0x00bf: 0x061f, # ARABIC QUESTION MARK + 0x00c0: 0x00a2, # CENT SIGN + 0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM + 0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM + 0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM + 0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM + 0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM + 0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM + 0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM + 0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM + 0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM + 0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM + 0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM + 0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM + 0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM + 0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM + 0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM + 0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM + 0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM + 0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM + 0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM + 0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM + 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM + 0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM + 0x00db: 0x00a6, # BROKEN VERTICAL BAR + 0x00dc: 0x00ac, # NOT SIGN + 0x00dd: 0x00f7, # DIVISION SIGN + 0x00de: 0x00d7, # MULTIPLICATION SIGN + 0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM + 0x00e0: 0x0640, # ARABIC TATWEEL + 0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM + 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM + 0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM + 0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM + 0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM + 0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM + 0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM + 0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM + 0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM + 0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM + 0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM + 0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM + 0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM + 0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM + 0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM + 0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM + 0x00f1: 0x0651, # ARABIC SHADDAH + 0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM + 0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM + 0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM + 0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM + 0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM + 0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM + 0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM + 0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM + 0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM + 0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM + 0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM + 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: None, # UNDEFINED }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'\u066a' # 0x0025 -> ARABIC PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xb0' # 0x0080 -> DEGREE SIGN + u'\xb7' # 0x0081 -> MIDDLE DOT + u'\u2219' # 0x0082 -> BULLET OPERATOR + u'\u221a' # 0x0083 -> SQUARE ROOT + u'\u2592' # 0x0084 -> MEDIUM SHADE + u'\u2500' # 0x0085 -> FORMS LIGHT HORIZONTAL + u'\u2502' # 0x0086 -> FORMS LIGHT VERTICAL + u'\u253c' # 0x0087 -> FORMS LIGHT VERTICAL AND HORIZONTAL + u'\u2524' # 0x0088 -> FORMS LIGHT VERTICAL AND LEFT + u'\u252c' # 0x0089 -> FORMS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x008a -> FORMS LIGHT VERTICAL AND RIGHT + u'\u2534' # 0x008b -> FORMS LIGHT UP AND HORIZONTAL + u'\u2510' # 0x008c -> FORMS LIGHT DOWN AND LEFT + u'\u250c' # 0x008d -> FORMS LIGHT DOWN AND RIGHT + u'\u2514' # 0x008e -> FORMS LIGHT UP AND RIGHT + u'\u2518' # 0x008f -> FORMS LIGHT UP AND LEFT + u'\u03b2' # 0x0090 -> GREEK SMALL BETA + u'\u221e' # 0x0091 -> INFINITY + u'\u03c6' # 0x0092 -> GREEK SMALL PHI + u'\xb1' # 0x0093 -> PLUS-OR-MINUS SIGN + u'\xbd' # 0x0094 -> FRACTION 1/2 + u'\xbc' # 0x0095 -> FRACTION 1/4 + u'\u2248' # 0x0096 -> ALMOST EQUAL TO + u'\xab' # 0x0097 -> LEFT POINTING GUILLEMET + u'\xbb' # 0x0098 -> RIGHT POINTING GUILLEMET + u'\ufef7' # 0x0099 -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM + u'\ufef8' # 0x009a -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM + u'\ufffe' # 0x009b -> UNDEFINED + u'\ufffe' # 0x009c -> UNDEFINED + u'\ufefb' # 0x009d -> ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM + u'\ufefc' # 0x009e -> ARABIC LIGATURE LAM WITH ALEF FINAL FORM + u'\ufffe' # 0x009f -> UNDEFINED + u'\xa0' # 0x00a0 -> NON-BREAKING SPACE + u'\xad' # 0x00a1 -> SOFT HYPHEN + u'\ufe82' # 0x00a2 -> ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\ufe84' # 0x00a5 -> ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM + u'\ufffe' # 0x00a6 -> UNDEFINED + u'\ufffe' # 0x00a7 -> UNDEFINED + u'\ufe8e' # 0x00a8 -> ARABIC LETTER ALEF FINAL FORM + u'\ufe8f' # 0x00a9 -> ARABIC LETTER BEH ISOLATED FORM + u'\ufe95' # 0x00aa -> ARABIC LETTER TEH ISOLATED FORM + u'\ufe99' # 0x00ab -> ARABIC LETTER THEH ISOLATED FORM + u'\u060c' # 0x00ac -> ARABIC COMMA + u'\ufe9d' # 0x00ad -> ARABIC LETTER JEEM ISOLATED FORM + u'\ufea1' # 0x00ae -> ARABIC LETTER HAH ISOLATED FORM + u'\ufea5' # 0x00af -> ARABIC LETTER KHAH ISOLATED FORM + u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO + u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE + u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO + u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE + u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR + u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE + u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX + u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN + u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT + u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE + u'\ufed1' # 0x00ba -> ARABIC LETTER FEH ISOLATED FORM + u'\u061b' # 0x00bb -> ARABIC SEMICOLON + u'\ufeb1' # 0x00bc -> ARABIC LETTER SEEN ISOLATED FORM + u'\ufeb5' # 0x00bd -> ARABIC LETTER SHEEN ISOLATED FORM + u'\ufeb9' # 0x00be -> ARABIC LETTER SAD ISOLATED FORM + u'\u061f' # 0x00bf -> ARABIC QUESTION MARK + u'\xa2' # 0x00c0 -> CENT SIGN + u'\ufe80' # 0x00c1 -> ARABIC LETTER HAMZA ISOLATED FORM + u'\ufe81' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + u'\ufe83' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM + u'\ufe85' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + u'\ufeca' # 0x00c5 -> ARABIC LETTER AIN FINAL FORM + u'\ufe8b' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + u'\ufe8d' # 0x00c7 -> ARABIC LETTER ALEF ISOLATED FORM + u'\ufe91' # 0x00c8 -> ARABIC LETTER BEH INITIAL FORM + u'\ufe93' # 0x00c9 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM + u'\ufe97' # 0x00ca -> ARABIC LETTER TEH INITIAL FORM + u'\ufe9b' # 0x00cb -> ARABIC LETTER THEH INITIAL FORM + u'\ufe9f' # 0x00cc -> ARABIC LETTER JEEM INITIAL FORM + u'\ufea3' # 0x00cd -> ARABIC LETTER HAH INITIAL FORM + u'\ufea7' # 0x00ce -> ARABIC LETTER KHAH INITIAL FORM + u'\ufea9' # 0x00cf -> ARABIC LETTER DAL ISOLATED FORM + u'\ufeab' # 0x00d0 -> ARABIC LETTER THAL ISOLATED FORM + u'\ufead' # 0x00d1 -> ARABIC LETTER REH ISOLATED FORM + u'\ufeaf' # 0x00d2 -> ARABIC LETTER ZAIN ISOLATED FORM + u'\ufeb3' # 0x00d3 -> ARABIC LETTER SEEN INITIAL FORM + u'\ufeb7' # 0x00d4 -> ARABIC LETTER SHEEN INITIAL FORM + u'\ufebb' # 0x00d5 -> ARABIC LETTER SAD INITIAL FORM + u'\ufebf' # 0x00d6 -> ARABIC LETTER DAD INITIAL FORM + u'\ufec1' # 0x00d7 -> ARABIC LETTER TAH ISOLATED FORM + u'\ufec5' # 0x00d8 -> ARABIC LETTER ZAH ISOLATED FORM + u'\ufecb' # 0x00d9 -> ARABIC LETTER AIN INITIAL FORM + u'\ufecf' # 0x00da -> ARABIC LETTER GHAIN INITIAL FORM + u'\xa6' # 0x00db -> BROKEN VERTICAL BAR + u'\xac' # 0x00dc -> NOT SIGN + u'\xf7' # 0x00dd -> DIVISION SIGN + u'\xd7' # 0x00de -> MULTIPLICATION SIGN + u'\ufec9' # 0x00df -> ARABIC LETTER AIN ISOLATED FORM + u'\u0640' # 0x00e0 -> ARABIC TATWEEL + u'\ufed3' # 0x00e1 -> ARABIC LETTER FEH INITIAL FORM + u'\ufed7' # 0x00e2 -> ARABIC LETTER QAF INITIAL FORM + u'\ufedb' # 0x00e3 -> ARABIC LETTER KAF INITIAL FORM + u'\ufedf' # 0x00e4 -> ARABIC LETTER LAM INITIAL FORM + u'\ufee3' # 0x00e5 -> ARABIC LETTER MEEM INITIAL FORM + u'\ufee7' # 0x00e6 -> ARABIC LETTER NOON INITIAL FORM + u'\ufeeb' # 0x00e7 -> ARABIC LETTER HEH INITIAL FORM + u'\ufeed' # 0x00e8 -> ARABIC LETTER WAW ISOLATED FORM + u'\ufeef' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA ISOLATED FORM + u'\ufef3' # 0x00ea -> ARABIC LETTER YEH INITIAL FORM + u'\ufebd' # 0x00eb -> ARABIC LETTER DAD ISOLATED FORM + u'\ufecc' # 0x00ec -> ARABIC LETTER AIN MEDIAL FORM + u'\ufece' # 0x00ed -> ARABIC LETTER GHAIN FINAL FORM + u'\ufecd' # 0x00ee -> ARABIC LETTER GHAIN ISOLATED FORM + u'\ufee1' # 0x00ef -> ARABIC LETTER MEEM ISOLATED FORM + u'\ufe7d' # 0x00f0 -> ARABIC SHADDA MEDIAL FORM + u'\u0651' # 0x00f1 -> ARABIC SHADDAH + u'\ufee5' # 0x00f2 -> ARABIC LETTER NOON ISOLATED FORM + u'\ufee9' # 0x00f3 -> ARABIC LETTER HEH ISOLATED FORM + u'\ufeec' # 0x00f4 -> ARABIC LETTER HEH MEDIAL FORM + u'\ufef0' # 0x00f5 -> ARABIC LETTER ALEF MAKSURA FINAL FORM + u'\ufef2' # 0x00f6 -> ARABIC LETTER YEH FINAL FORM + u'\ufed0' # 0x00f7 -> ARABIC LETTER GHAIN MEDIAL FORM + u'\ufed5' # 0x00f8 -> ARABIC LETTER QAF ISOLATED FORM + u'\ufef5' # 0x00f9 -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM + u'\ufef6' # 0x00fa -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM + u'\ufedd' # 0x00fb -> ARABIC LETTER LAM ISOLATED FORM + u'\ufed9' # 0x00fc -> ARABIC LETTER KAF ISOLATED FORM + u'\ufef1' # 0x00fd -> ARABIC LETTER YEH ISOLATED FORM + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\ufffe' # 0x00ff -> UNDEFINED +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NON-BREAKING SPACE + 0x00a2: 0x00c0, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a6: 0x00db, # BROKEN VERTICAL BAR + 0x00ab: 0x0097, # LEFT POINTING GUILLEMET + 0x00ac: 0x00dc, # NOT SIGN + 0x00ad: 0x00a1, # SOFT HYPHEN + 0x00b0: 0x0080, # DEGREE SIGN + 0x00b1: 0x0093, # PLUS-OR-MINUS SIGN + 0x00b7: 0x0081, # MIDDLE DOT + 0x00bb: 0x0098, # RIGHT POINTING GUILLEMET + 0x00bc: 0x0095, # FRACTION 1/4 + 0x00bd: 0x0094, # FRACTION 1/2 + 0x00d7: 0x00de, # MULTIPLICATION SIGN + 0x00f7: 0x00dd, # DIVISION SIGN + 0x03b2: 0x0090, # GREEK SMALL BETA + 0x03c6: 0x0092, # GREEK SMALL PHI + 0x060c: 0x00ac, # ARABIC COMMA + 0x061b: 0x00bb, # ARABIC SEMICOLON + 0x061f: 0x00bf, # ARABIC QUESTION MARK + 0x0640: 0x00e0, # ARABIC TATWEEL + 0x0651: 0x00f1, # ARABIC SHADDAH + 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO + 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE + 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO + 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE + 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR + 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE + 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX + 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN + 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT + 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE + 0x066a: 0x0025, # ARABIC PERCENT SIGN + 0x2219: 0x0082, # BULLET OPERATOR + 0x221a: 0x0083, # SQUARE ROOT + 0x221e: 0x0091, # INFINITY + 0x2248: 0x0096, # ALMOST EQUAL TO + 0x2500: 0x0085, # FORMS LIGHT HORIZONTAL + 0x2502: 0x0086, # FORMS LIGHT VERTICAL + 0x250c: 0x008d, # FORMS LIGHT DOWN AND RIGHT + 0x2510: 0x008c, # FORMS LIGHT DOWN AND LEFT + 0x2514: 0x008e, # FORMS LIGHT UP AND RIGHT + 0x2518: 0x008f, # FORMS LIGHT UP AND LEFT + 0x251c: 0x008a, # FORMS LIGHT VERTICAL AND RIGHT + 0x2524: 0x0088, # FORMS LIGHT VERTICAL AND LEFT + 0x252c: 0x0089, # FORMS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x008b, # FORMS LIGHT UP AND HORIZONTAL + 0x253c: 0x0087, # FORMS LIGHT VERTICAL AND HORIZONTAL + 0x2592: 0x0084, # MEDIUM SHADE + 0x25a0: 0x00fe, # BLACK SQUARE + 0xfe7d: 0x00f0, # ARABIC SHADDA MEDIAL FORM + 0xfe80: 0x00c1, # ARABIC LETTER HAMZA ISOLATED FORM + 0xfe81: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0xfe82: 0x00a2, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM + 0xfe83: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM + 0xfe84: 0x00a5, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM + 0xfe85: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0xfe8b: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0xfe8d: 0x00c7, # ARABIC LETTER ALEF ISOLATED FORM + 0xfe8e: 0x00a8, # ARABIC LETTER ALEF FINAL FORM + 0xfe8f: 0x00a9, # ARABIC LETTER BEH ISOLATED FORM + 0xfe91: 0x00c8, # ARABIC LETTER BEH INITIAL FORM + 0xfe93: 0x00c9, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0xfe95: 0x00aa, # ARABIC LETTER TEH ISOLATED FORM + 0xfe97: 0x00ca, # ARABIC LETTER TEH INITIAL FORM + 0xfe99: 0x00ab, # ARABIC LETTER THEH ISOLATED FORM + 0xfe9b: 0x00cb, # ARABIC LETTER THEH INITIAL FORM + 0xfe9d: 0x00ad, # ARABIC LETTER JEEM ISOLATED FORM + 0xfe9f: 0x00cc, # ARABIC LETTER JEEM INITIAL FORM + 0xfea1: 0x00ae, # ARABIC LETTER HAH ISOLATED FORM + 0xfea3: 0x00cd, # ARABIC LETTER HAH INITIAL FORM + 0xfea5: 0x00af, # ARABIC LETTER KHAH ISOLATED FORM + 0xfea7: 0x00ce, # ARABIC LETTER KHAH INITIAL FORM + 0xfea9: 0x00cf, # ARABIC LETTER DAL ISOLATED FORM + 0xfeab: 0x00d0, # ARABIC LETTER THAL ISOLATED FORM + 0xfead: 0x00d1, # ARABIC LETTER REH ISOLATED FORM + 0xfeaf: 0x00d2, # ARABIC LETTER ZAIN ISOLATED FORM + 0xfeb1: 0x00bc, # ARABIC LETTER SEEN ISOLATED FORM + 0xfeb3: 0x00d3, # ARABIC LETTER SEEN INITIAL FORM + 0xfeb5: 0x00bd, # ARABIC LETTER SHEEN ISOLATED FORM + 0xfeb7: 0x00d4, # ARABIC LETTER SHEEN INITIAL FORM + 0xfeb9: 0x00be, # ARABIC LETTER SAD ISOLATED FORM + 0xfebb: 0x00d5, # ARABIC LETTER SAD INITIAL FORM + 0xfebd: 0x00eb, # ARABIC LETTER DAD ISOLATED FORM + 0xfebf: 0x00d6, # ARABIC LETTER DAD INITIAL FORM + 0xfec1: 0x00d7, # ARABIC LETTER TAH ISOLATED FORM + 0xfec5: 0x00d8, # ARABIC LETTER ZAH ISOLATED FORM + 0xfec9: 0x00df, # ARABIC LETTER AIN ISOLATED FORM + 0xfeca: 0x00c5, # ARABIC LETTER AIN FINAL FORM + 0xfecb: 0x00d9, # ARABIC LETTER AIN INITIAL FORM + 0xfecc: 0x00ec, # ARABIC LETTER AIN MEDIAL FORM + 0xfecd: 0x00ee, # ARABIC LETTER GHAIN ISOLATED FORM + 0xfece: 0x00ed, # ARABIC LETTER GHAIN FINAL FORM + 0xfecf: 0x00da, # ARABIC LETTER GHAIN INITIAL FORM + 0xfed0: 0x00f7, # ARABIC LETTER GHAIN MEDIAL FORM + 0xfed1: 0x00ba, # ARABIC LETTER FEH ISOLATED FORM + 0xfed3: 0x00e1, # ARABIC LETTER FEH INITIAL FORM + 0xfed5: 0x00f8, # ARABIC LETTER QAF ISOLATED FORM + 0xfed7: 0x00e2, # ARABIC LETTER QAF INITIAL FORM + 0xfed9: 0x00fc, # ARABIC LETTER KAF ISOLATED FORM + 0xfedb: 0x00e3, # ARABIC LETTER KAF INITIAL FORM + 0xfedd: 0x00fb, # ARABIC LETTER LAM ISOLATED FORM + 0xfedf: 0x00e4, # ARABIC LETTER LAM INITIAL FORM + 0xfee1: 0x00ef, # ARABIC LETTER MEEM ISOLATED FORM + 0xfee3: 0x00e5, # ARABIC LETTER MEEM INITIAL FORM + 0xfee5: 0x00f2, # ARABIC LETTER NOON ISOLATED FORM + 0xfee7: 0x00e6, # ARABIC LETTER NOON INITIAL FORM + 0xfee9: 0x00f3, # ARABIC LETTER HEH ISOLATED FORM + 0xfeeb: 0x00e7, # ARABIC LETTER HEH INITIAL FORM + 0xfeec: 0x00f4, # ARABIC LETTER HEH MEDIAL FORM + 0xfeed: 0x00e8, # ARABIC LETTER WAW ISOLATED FORM + 0xfeef: 0x00e9, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM + 0xfef0: 0x00f5, # ARABIC LETTER ALEF MAKSURA FINAL FORM + 0xfef1: 0x00fd, # ARABIC LETTER YEH ISOLATED FORM + 0xfef2: 0x00f6, # ARABIC LETTER YEH FINAL FORM + 0xfef3: 0x00ea, # ARABIC LETTER YEH INITIAL FORM + 0xfef5: 0x00f9, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM + 0xfef6: 0x00fa, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM + 0xfef7: 0x0099, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM + 0xfef8: 0x009a, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM + 0xfefb: 0x009d, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM + 0xfefc: 0x009e, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM +} \ No newline at end of file Index: cp865.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp865.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp865.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp865.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP865.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP865.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00a4, # CURRENCY SIGN - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00a4, # CURRENCY SIGN + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xa4' # 0x00af -> CURRENCY SIGN + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x00af, # CURRENCY SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} \ No newline at end of file Index: cp866.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp866.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp866.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp866.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP866.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP866.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O - 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E - 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x2116, # NUMERO SIGN - 0x00fd: 0x00a4, # CURRENCY SIGN - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I + 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O + 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U + 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E + 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI + 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x2116, # NUMERO SIGN + 0x00fd: 0x00a4, # CURRENCY SIGN + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0410' # 0x0080 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0x0081 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0x0082 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0x0083 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0x0084 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0x0085 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0x0086 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0x0087 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0x0088 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0x0089 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0x008a -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0x008b -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0x008c -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0x008d -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0x008e -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0x008f -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0x0090 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0x0091 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0x0092 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0x0093 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0x0094 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0x0095 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0x0096 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0x0097 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0x0098 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0x0099 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0x009b -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0x009d -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0x009e -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0x009f -> CYRILLIC CAPITAL LETTER YA + u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0x00a1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0x00a2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0x00a3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0x00a4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0x00a5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0x00a6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0x00a7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0x00a8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0x00a9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0x00aa -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0x00ab -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0x00ac -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0x00ad -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0x00ae -> CYRILLIC SMALL LETTER O + u'\u043f' # 0x00af -> CYRILLIC SMALL LETTER PE + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u0440' # 0x00e0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0x00e1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0x00e2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0x00e3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0x00e4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0x00e5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0x00e6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0x00e7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0x00e8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0x00e9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0x00ea -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0x00eb -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0x00ed -> CYRILLIC SMALL LETTER E + u'\u044e' # 0x00ee -> CYRILLIC SMALL LETTER YU + u'\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA + u'\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO + u'\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO + u'\u0404' # 0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0454' # 0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0407' # 0x00f4 -> CYRILLIC CAPITAL LETTER YI + u'\u0457' # 0x00f5 -> CYRILLIC SMALL LETTER YI + u'\u040e' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045e' # 0x00f7 -> CYRILLIC SMALL LETTER SHORT U + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u2116' # 0x00fc -> NUMERO SIGN + u'\xa4' # 0x00fd -> CURRENCY SIGN + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a4: 0x00fd, # CURRENCY SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO + 0x0404: 0x00f2, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0407: 0x00f4, # CYRILLIC CAPITAL LETTER YI + 0x040e: 0x00f6, # CYRILLIC CAPITAL LETTER SHORT U + 0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x0083, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x0084, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x0085, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x0086, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x0087, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x0088, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x0089, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x008a, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x008b, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x008c, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x008d, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x008e, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x008f, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x0090, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x0091, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x0092, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x0093, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x0094, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x0095, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x0096, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x0097, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x0098, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x0099, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x009a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x009b, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x009c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x009d, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x009e, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x009f, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A + 0x0431: 0x00a1, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00a2, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00a3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00a4, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00a5, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00a6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00a7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00a8, # CYRILLIC SMALL LETTER I + 0x0439: 0x00a9, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00aa, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00ab, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00ac, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00ad, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00ae, # CYRILLIC SMALL LETTER O + 0x043f: 0x00af, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00e0, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00e1, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00e2, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00e3, # CYRILLIC SMALL LETTER U + 0x0444: 0x00e4, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00e5, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00e6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00e7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00e8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00e9, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x00ea, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00eb, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00ec, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00ed, # CYRILLIC SMALL LETTER E + 0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA + 0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO + 0x0454: 0x00f3, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0457: 0x00f5, # CYRILLIC SMALL LETTER YI + 0x045e: 0x00f7, # CYRILLIC SMALL LETTER SHORT U + 0x2116: 0x00fc, # NUMERO SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} \ No newline at end of file Index: cp869.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp869.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp869.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp869.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP869.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP869.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,645 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: None, # UNDEFINED - 0x0081: None, # UNDEFINED - 0x0082: None, # UNDEFINED - 0x0083: None, # UNDEFINED - 0x0084: None, # UNDEFINED - 0x0085: None, # UNDEFINED - 0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0087: None, # UNDEFINED - 0x0088: 0x00b7, # MIDDLE DOT - 0x0089: 0x00ac, # NOT SIGN - 0x008a: 0x00a6, # BROKEN BAR - 0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x008e: 0x2015, # HORIZONTAL BAR - 0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x0093: None, # UNDEFINED - 0x0094: None, # UNDEFINED - 0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x0097: 0x00a9, # COPYRIGHT SIGN - 0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0099: 0x00b2, # SUPERSCRIPT TWO - 0x009a: 0x00b3, # SUPERSCRIPT THREE - 0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA - 0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x00b7: 0x039c, # GREEK CAPITAL LETTER MU - 0x00b8: 0x039d, # GREEK CAPITAL LETTER NU - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x039e, # GREEK CAPITAL LETTER XI - 0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI - 0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00d7: 0x03b2, # GREEK SMALL LETTER BETA - 0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA - 0x00e1: 0x03b7, # GREEK SMALL LETTER ETA - 0x00e2: 0x03b8, # GREEK SMALL LETTER THETA - 0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00e6: 0x03bc, # GREEK SMALL LETTER MU - 0x00e7: 0x03bd, # GREEK SMALL LETTER NU - 0x00e8: 0x03be, # GREEK SMALL LETTER XI - 0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00ea: 0x03c0, # GREEK SMALL LETTER PI - 0x00eb: 0x03c1, # GREEK SMALL LETTER RHO - 0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00ee: 0x03c4, # GREEK SMALL LETTER TAU - 0x00ef: 0x0384, # GREEK TONOS - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00f3: 0x03c6, # GREEK SMALL LETTER PHI - 0x00f4: 0x03c7, # GREEK SMALL LETTER CHI - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x03c8, # GREEK SMALL LETTER PSI - 0x00f7: 0x0385, # GREEK DIALYTIKA TONOS - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE + 0x0080: None, # UNDEFINED + 0x0081: None, # UNDEFINED + 0x0082: None, # UNDEFINED + 0x0083: None, # UNDEFINED + 0x0084: None, # UNDEFINED + 0x0085: None, # UNDEFINED + 0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0087: None, # UNDEFINED + 0x0088: 0x00b7, # MIDDLE DOT + 0x0089: 0x00ac, # NOT SIGN + 0x008a: 0x00a6, # BROKEN BAR + 0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x008e: 0x2015, # HORIZONTAL BAR + 0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x0093: None, # UNDEFINED + 0x0094: None, # UNDEFINED + 0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x0097: 0x00a9, # COPYRIGHT SIGN + 0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0099: 0x00b2, # SUPERSCRIPT TWO + 0x009a: 0x00b3, # SUPERSCRIPT THREE + 0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA + 0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x00b7: 0x039c, # GREEK CAPITAL LETTER MU + 0x00b8: 0x039d, # GREEK CAPITAL LETTER NU + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x039e, # GREEK CAPITAL LETTER XI + 0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI + 0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00d7: 0x03b2, # GREEK SMALL LETTER BETA + 0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA + 0x00e1: 0x03b7, # GREEK SMALL LETTER ETA + 0x00e2: 0x03b8, # GREEK SMALL LETTER THETA + 0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00e6: 0x03bc, # GREEK SMALL LETTER MU + 0x00e7: 0x03bd, # GREEK SMALL LETTER NU + 0x00e8: 0x03be, # GREEK SMALL LETTER XI + 0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00ea: 0x03c0, # GREEK SMALL LETTER PI + 0x00eb: 0x03c1, # GREEK SMALL LETTER RHO + 0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00ee: 0x03c4, # GREEK SMALL LETTER TAU + 0x00ef: 0x0384, # GREEK TONOS + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00f3: 0x03c6, # GREEK SMALL LETTER PHI + 0x00f4: 0x03c7, # GREEK SMALL LETTER CHI + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x03c8, # GREEK SMALL LETTER PSI + 0x00f7: 0x0385, # GREEK DIALYTIKA TONOS + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\ufffe' # 0x0080 -> UNDEFINED + u'\ufffe' # 0x0081 -> UNDEFINED + u'\ufffe' # 0x0082 -> UNDEFINED + u'\ufffe' # 0x0083 -> UNDEFINED + u'\ufffe' # 0x0084 -> UNDEFINED + u'\ufffe' # 0x0085 -> UNDEFINED + u'\u0386' # 0x0086 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\ufffe' # 0x0087 -> UNDEFINED + u'\xb7' # 0x0088 -> MIDDLE DOT + u'\xac' # 0x0089 -> NOT SIGN + u'\xa6' # 0x008a -> BROKEN BAR + u'\u2018' # 0x008b -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x008c -> RIGHT SINGLE QUOTATION MARK + u'\u0388' # 0x008d -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u2015' # 0x008e -> HORIZONTAL BAR + u'\u0389' # 0x008f -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0x0090 -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\u03aa' # 0x0091 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u038c' # 0x0092 -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\ufffe' # 0x0093 -> UNDEFINED + u'\ufffe' # 0x0094 -> UNDEFINED + u'\u038e' # 0x0095 -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u03ab' # 0x0096 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\xa9' # 0x0097 -> COPYRIGHT SIGN + u'\u038f' # 0x0098 -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\xb2' # 0x0099 -> SUPERSCRIPT TWO + u'\xb3' # 0x009a -> SUPERSCRIPT THREE + u'\u03ac' # 0x009b -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\xa3' # 0x009c -> POUND SIGN + u'\u03ad' # 0x009d -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0x009e -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0x009f -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03ca' # 0x00a0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u0390' # 0x00a1 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u03cc' # 0x00a2 -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0x00a3 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u0391' # 0x00a4 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0x00a5 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0x00a6 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x00a7 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0x00a8 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x00a9 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x00aa -> GREEK CAPITAL LETTER ETA + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\u0398' # 0x00ac -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0x00ad -> GREEK CAPITAL LETTER IOTA + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u039a' # 0x00b5 -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0x00b6 -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0x00b7 -> GREEK CAPITAL LETTER MU + u'\u039d' # 0x00b8 -> GREEK CAPITAL LETTER NU + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u039e' # 0x00bd -> GREEK CAPITAL LETTER XI + u'\u039f' # 0x00be -> GREEK CAPITAL LETTER OMICRON + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u03a0' # 0x00c6 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0x00c7 -> GREEK CAPITAL LETTER RHO + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u03a3' # 0x00cf -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0x00d0 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0x00d1 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0x00d2 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0x00d3 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0x00d4 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x00d5 -> GREEK CAPITAL LETTER OMEGA + u'\u03b1' # 0x00d6 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x00d7 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0x00d8 -> GREEK SMALL LETTER GAMMA + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u03b4' # 0x00dd -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x00de -> GREEK SMALL LETTER EPSILON + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b6' # 0x00e0 -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0x00e1 -> GREEK SMALL LETTER ETA + u'\u03b8' # 0x00e2 -> GREEK SMALL LETTER THETA + u'\u03b9' # 0x00e3 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0x00e4 -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x00e5 -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x00e6 -> GREEK SMALL LETTER MU + u'\u03bd' # 0x00e7 -> GREEK SMALL LETTER NU + u'\u03be' # 0x00e8 -> GREEK SMALL LETTER XI + u'\u03bf' # 0x00e9 -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0x00ea -> GREEK SMALL LETTER PI + u'\u03c1' # 0x00eb -> GREEK SMALL LETTER RHO + u'\u03c3' # 0x00ec -> GREEK SMALL LETTER SIGMA + u'\u03c2' # 0x00ed -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c4' # 0x00ee -> GREEK SMALL LETTER TAU + u'\u0384' # 0x00ef -> GREEK TONOS + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u03c5' # 0x00f2 -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0x00f3 -> GREEK SMALL LETTER PHI + u'\u03c7' # 0x00f4 -> GREEK SMALL LETTER CHI + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\u03c8' # 0x00f6 -> GREEK SMALL LETTER PSI + u'\u0385' # 0x00f7 -> GREEK DIALYTIKA TONOS + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\u03c9' # 0x00fa -> GREEK SMALL LETTER OMEGA + u'\u03cb' # 0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03b0' # 0x00fc -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u03ce' # 0x00fd -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a3: 0x009c, # POUND SIGN + 0x00a6: 0x008a, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00a9: 0x0097, # COPYRIGHT SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x0089, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x0099, # SUPERSCRIPT TWO + 0x00b3: 0x009a, # SUPERSCRIPT THREE + 0x00b7: 0x0088, # MIDDLE DOT + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x0384: 0x00ef, # GREEK TONOS + 0x0385: 0x00f7, # GREEK DIALYTIKA TONOS + 0x0386: 0x0086, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0x008d, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0x008f, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038a: 0x0090, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038c: 0x0092, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038e: 0x0095, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038f: 0x0098, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0x00a1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0x00a4, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0x00a5, # GREEK CAPITAL LETTER BETA + 0x0393: 0x00a6, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0x00a7, # GREEK CAPITAL LETTER DELTA + 0x0395: 0x00a8, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0x00a9, # GREEK CAPITAL LETTER ZETA + 0x0397: 0x00aa, # GREEK CAPITAL LETTER ETA + 0x0398: 0x00ac, # GREEK CAPITAL LETTER THETA + 0x0399: 0x00ad, # GREEK CAPITAL LETTER IOTA + 0x039a: 0x00b5, # GREEK CAPITAL LETTER KAPPA + 0x039b: 0x00b6, # GREEK CAPITAL LETTER LAMDA + 0x039c: 0x00b7, # GREEK CAPITAL LETTER MU + 0x039d: 0x00b8, # GREEK CAPITAL LETTER NU + 0x039e: 0x00bd, # GREEK CAPITAL LETTER XI + 0x039f: 0x00be, # GREEK CAPITAL LETTER OMICRON + 0x03a0: 0x00c6, # GREEK CAPITAL LETTER PI + 0x03a1: 0x00c7, # GREEK CAPITAL LETTER RHO + 0x03a3: 0x00cf, # GREEK CAPITAL LETTER SIGMA + 0x03a4: 0x00d0, # GREEK CAPITAL LETTER TAU + 0x03a5: 0x00d1, # GREEK CAPITAL LETTER UPSILON + 0x03a6: 0x00d2, # GREEK CAPITAL LETTER PHI + 0x03a7: 0x00d3, # GREEK CAPITAL LETTER CHI + 0x03a8: 0x00d4, # GREEK CAPITAL LETTER PSI + 0x03a9: 0x00d5, # GREEK CAPITAL LETTER OMEGA + 0x03aa: 0x0091, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03ab: 0x0096, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03ac: 0x009b, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03ad: 0x009d, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03ae: 0x009e, # GREEK SMALL LETTER ETA WITH TONOS + 0x03af: 0x009f, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03b0: 0x00fc, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03b1: 0x00d6, # GREEK SMALL LETTER ALPHA + 0x03b2: 0x00d7, # GREEK SMALL LETTER BETA + 0x03b3: 0x00d8, # GREEK SMALL LETTER GAMMA + 0x03b4: 0x00dd, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00de, # GREEK SMALL LETTER EPSILON + 0x03b6: 0x00e0, # GREEK SMALL LETTER ZETA + 0x03b7: 0x00e1, # GREEK SMALL LETTER ETA + 0x03b8: 0x00e2, # GREEK SMALL LETTER THETA + 0x03b9: 0x00e3, # GREEK SMALL LETTER IOTA + 0x03ba: 0x00e4, # GREEK SMALL LETTER KAPPA + 0x03bb: 0x00e5, # GREEK SMALL LETTER LAMDA + 0x03bc: 0x00e6, # GREEK SMALL LETTER MU + 0x03bd: 0x00e7, # GREEK SMALL LETTER NU + 0x03be: 0x00e8, # GREEK SMALL LETTER XI + 0x03bf: 0x00e9, # GREEK SMALL LETTER OMICRON + 0x03c0: 0x00ea, # GREEK SMALL LETTER PI + 0x03c1: 0x00eb, # GREEK SMALL LETTER RHO + 0x03c2: 0x00ed, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0x00ec, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00ee, # GREEK SMALL LETTER TAU + 0x03c5: 0x00f2, # GREEK SMALL LETTER UPSILON + 0x03c6: 0x00f3, # GREEK SMALL LETTER PHI + 0x03c7: 0x00f4, # GREEK SMALL LETTER CHI + 0x03c8: 0x00f6, # GREEK SMALL LETTER PSI + 0x03c9: 0x00fa, # GREEK SMALL LETTER OMEGA + 0x03ca: 0x00a0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0x00fb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0x00a2, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0x00a3, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0x00fd, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2015: 0x008e, # HORIZONTAL BAR + 0x2018: 0x008b, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x008c, # RIGHT SINGLE QUOTATION MARK + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +} \ No newline at end of file Index: cp874.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp874.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp874.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp874.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP874.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP874.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,135 +32,622 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0081: None, # UNDEFINED - 0x0082: None, # UNDEFINED - 0x0083: None, # UNDEFINED - 0x0084: None, # UNDEFINED - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: None, # UNDEFINED - 0x0087: None, # UNDEFINED - 0x0088: None, # UNDEFINED - 0x0089: None, # UNDEFINED - 0x008a: None, # UNDEFINED - 0x008b: None, # UNDEFINED - 0x008c: None, # UNDEFINED - 0x008d: None, # UNDEFINED - 0x008e: None, # UNDEFINED - 0x008f: None, # UNDEFINED - 0x0090: None, # UNDEFINED - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: None, # UNDEFINED - 0x0099: None, # UNDEFINED - 0x009a: None, # UNDEFINED - 0x009b: None, # UNDEFINED - 0x009c: None, # UNDEFINED - 0x009d: None, # UNDEFINED - 0x009e: None, # UNDEFINED - 0x009f: None, # UNDEFINED - 0x00a1: 0x0e01, # THAI CHARACTER KO KAI - 0x00a2: 0x0e02, # THAI CHARACTER KHO KHAI - 0x00a3: 0x0e03, # THAI CHARACTER KHO KHUAT - 0x00a4: 0x0e04, # THAI CHARACTER KHO KHWAI - 0x00a5: 0x0e05, # THAI CHARACTER KHO KHON - 0x00a6: 0x0e06, # THAI CHARACTER KHO RAKHANG - 0x00a7: 0x0e07, # THAI CHARACTER NGO NGU - 0x00a8: 0x0e08, # THAI CHARACTER CHO CHAN - 0x00a9: 0x0e09, # THAI CHARACTER CHO CHING - 0x00aa: 0x0e0a, # THAI CHARACTER CHO CHANG - 0x00ab: 0x0e0b, # THAI CHARACTER SO SO - 0x00ac: 0x0e0c, # THAI CHARACTER CHO CHOE - 0x00ad: 0x0e0d, # THAI CHARACTER YO YING - 0x00ae: 0x0e0e, # THAI CHARACTER DO CHADA - 0x00af: 0x0e0f, # THAI CHARACTER TO PATAK - 0x00b0: 0x0e10, # THAI CHARACTER THO THAN - 0x00b1: 0x0e11, # THAI CHARACTER THO NANGMONTHO - 0x00b2: 0x0e12, # THAI CHARACTER THO PHUTHAO - 0x00b3: 0x0e13, # THAI CHARACTER NO NEN - 0x00b4: 0x0e14, # THAI CHARACTER DO DEK - 0x00b5: 0x0e15, # THAI CHARACTER TO TAO - 0x00b6: 0x0e16, # THAI CHARACTER THO THUNG - 0x00b7: 0x0e17, # THAI CHARACTER THO THAHAN - 0x00b8: 0x0e18, # THAI CHARACTER THO THONG - 0x00b9: 0x0e19, # THAI CHARACTER NO NU - 0x00ba: 0x0e1a, # THAI CHARACTER BO BAIMAI - 0x00bb: 0x0e1b, # THAI CHARACTER PO PLA - 0x00bc: 0x0e1c, # THAI CHARACTER PHO PHUNG - 0x00bd: 0x0e1d, # THAI CHARACTER FO FA - 0x00be: 0x0e1e, # THAI CHARACTER PHO PHAN - 0x00bf: 0x0e1f, # THAI CHARACTER FO FAN - 0x00c0: 0x0e20, # THAI CHARACTER PHO SAMPHAO - 0x00c1: 0x0e21, # THAI CHARACTER MO MA - 0x00c2: 0x0e22, # THAI CHARACTER YO YAK - 0x00c3: 0x0e23, # THAI CHARACTER RO RUA - 0x00c4: 0x0e24, # THAI CHARACTER RU - 0x00c5: 0x0e25, # THAI CHARACTER LO LING - 0x00c6: 0x0e26, # THAI CHARACTER LU - 0x00c7: 0x0e27, # THAI CHARACTER WO WAEN - 0x00c8: 0x0e28, # THAI CHARACTER SO SALA - 0x00c9: 0x0e29, # THAI CHARACTER SO RUSI - 0x00ca: 0x0e2a, # THAI CHARACTER SO SUA - 0x00cb: 0x0e2b, # THAI CHARACTER HO HIP - 0x00cc: 0x0e2c, # THAI CHARACTER LO CHULA - 0x00cd: 0x0e2d, # THAI CHARACTER O ANG - 0x00ce: 0x0e2e, # THAI CHARACTER HO NOKHUK - 0x00cf: 0x0e2f, # THAI CHARACTER PAIYANNOI - 0x00d0: 0x0e30, # THAI CHARACTER SARA A - 0x00d1: 0x0e31, # THAI CHARACTER MAI HAN-AKAT - 0x00d2: 0x0e32, # THAI CHARACTER SARA AA - 0x00d3: 0x0e33, # THAI CHARACTER SARA AM - 0x00d4: 0x0e34, # THAI CHARACTER SARA I - 0x00d5: 0x0e35, # THAI CHARACTER SARA II - 0x00d6: 0x0e36, # THAI CHARACTER SARA UE - 0x00d7: 0x0e37, # THAI CHARACTER SARA UEE - 0x00d8: 0x0e38, # THAI CHARACTER SARA U - 0x00d9: 0x0e39, # THAI CHARACTER SARA UU - 0x00da: 0x0e3a, # THAI CHARACTER PHINTHU - 0x00db: None, # UNDEFINED - 0x00dc: None, # UNDEFINED - 0x00dd: None, # UNDEFINED - 0x00de: None, # UNDEFINED - 0x00df: 0x0e3f, # THAI CURRENCY SYMBOL BAHT - 0x00e0: 0x0e40, # THAI CHARACTER SARA E - 0x00e1: 0x0e41, # THAI CHARACTER SARA AE - 0x00e2: 0x0e42, # THAI CHARACTER SARA O - 0x00e3: 0x0e43, # THAI CHARACTER SARA AI MAIMUAN - 0x00e4: 0x0e44, # THAI CHARACTER SARA AI MAIMALAI - 0x00e5: 0x0e45, # THAI CHARACTER LAKKHANGYAO - 0x00e6: 0x0e46, # THAI CHARACTER MAIYAMOK - 0x00e7: 0x0e47, # THAI CHARACTER MAITAIKHU - 0x00e8: 0x0e48, # THAI CHARACTER MAI EK - 0x00e9: 0x0e49, # THAI CHARACTER MAI THO - 0x00ea: 0x0e4a, # THAI CHARACTER MAI TRI - 0x00eb: 0x0e4b, # THAI CHARACTER MAI CHATTAWA - 0x00ec: 0x0e4c, # THAI CHARACTER THANTHAKHAT - 0x00ed: 0x0e4d, # THAI CHARACTER NIKHAHIT - 0x00ee: 0x0e4e, # THAI CHARACTER YAMAKKAN - 0x00ef: 0x0e4f, # THAI CHARACTER FONGMAN - 0x00f0: 0x0e50, # THAI DIGIT ZERO - 0x00f1: 0x0e51, # THAI DIGIT ONE - 0x00f2: 0x0e52, # THAI DIGIT TWO - 0x00f3: 0x0e53, # THAI DIGIT THREE - 0x00f4: 0x0e54, # THAI DIGIT FOUR - 0x00f5: 0x0e55, # THAI DIGIT FIVE - 0x00f6: 0x0e56, # THAI DIGIT SIX - 0x00f7: 0x0e57, # THAI DIGIT SEVEN - 0x00f8: 0x0e58, # THAI DIGIT EIGHT - 0x00f9: 0x0e59, # THAI DIGIT NINE - 0x00fa: 0x0e5a, # THAI CHARACTER ANGKHANKHU - 0x00fb: 0x0e5b, # THAI CHARACTER KHOMUT - 0x00fc: None, # UNDEFINED - 0x00fd: None, # UNDEFINED - 0x00fe: None, # UNDEFINED - 0x00ff: None, # UNDEFINED + 0x0080: 0x20ac, # EURO SIGN + 0x0081: None, # UNDEFINED + 0x0082: None, # UNDEFINED + 0x0083: None, # UNDEFINED + 0x0084: None, # UNDEFINED + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: None, # UNDEFINED + 0x0087: None, # UNDEFINED + 0x0088: None, # UNDEFINED + 0x0089: None, # UNDEFINED + 0x008a: None, # UNDEFINED + 0x008b: None, # UNDEFINED + 0x008c: None, # UNDEFINED + 0x008d: None, # UNDEFINED + 0x008e: None, # UNDEFINED + 0x008f: None, # UNDEFINED + 0x0090: None, # UNDEFINED + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: None, # UNDEFINED + 0x0099: None, # UNDEFINED + 0x009a: None, # UNDEFINED + 0x009b: None, # UNDEFINED + 0x009c: None, # UNDEFINED + 0x009d: None, # UNDEFINED + 0x009e: None, # UNDEFINED + 0x009f: None, # UNDEFINED + 0x00a1: 0x0e01, # THAI CHARACTER KO KAI + 0x00a2: 0x0e02, # THAI CHARACTER KHO KHAI + 0x00a3: 0x0e03, # THAI CHARACTER KHO KHUAT + 0x00a4: 0x0e04, # THAI CHARACTER KHO KHWAI + 0x00a5: 0x0e05, # THAI CHARACTER KHO KHON + 0x00a6: 0x0e06, # THAI CHARACTER KHO RAKHANG + 0x00a7: 0x0e07, # THAI CHARACTER NGO NGU + 0x00a8: 0x0e08, # THAI CHARACTER CHO CHAN + 0x00a9: 0x0e09, # THAI CHARACTER CHO CHING + 0x00aa: 0x0e0a, # THAI CHARACTER CHO CHANG + 0x00ab: 0x0e0b, # THAI CHARACTER SO SO + 0x00ac: 0x0e0c, # THAI CHARACTER CHO CHOE + 0x00ad: 0x0e0d, # THAI CHARACTER YO YING + 0x00ae: 0x0e0e, # THAI CHARACTER DO CHADA + 0x00af: 0x0e0f, # THAI CHARACTER TO PATAK + 0x00b0: 0x0e10, # THAI CHARACTER THO THAN + 0x00b1: 0x0e11, # THAI CHARACTER THO NANGMONTHO + 0x00b2: 0x0e12, # THAI CHARACTER THO PHUTHAO + 0x00b3: 0x0e13, # THAI CHARACTER NO NEN + 0x00b4: 0x0e14, # THAI CHARACTER DO DEK + 0x00b5: 0x0e15, # THAI CHARACTER TO TAO + 0x00b6: 0x0e16, # THAI CHARACTER THO THUNG + 0x00b7: 0x0e17, # THAI CHARACTER THO THAHAN + 0x00b8: 0x0e18, # THAI CHARACTER THO THONG + 0x00b9: 0x0e19, # THAI CHARACTER NO NU + 0x00ba: 0x0e1a, # THAI CHARACTER BO BAIMAI + 0x00bb: 0x0e1b, # THAI CHARACTER PO PLA + 0x00bc: 0x0e1c, # THAI CHARACTER PHO PHUNG + 0x00bd: 0x0e1d, # THAI CHARACTER FO FA + 0x00be: 0x0e1e, # THAI CHARACTER PHO PHAN + 0x00bf: 0x0e1f, # THAI CHARACTER FO FAN + 0x00c0: 0x0e20, # THAI CHARACTER PHO SAMPHAO + 0x00c1: 0x0e21, # THAI CHARACTER MO MA + 0x00c2: 0x0e22, # THAI CHARACTER YO YAK + 0x00c3: 0x0e23, # THAI CHARACTER RO RUA + 0x00c4: 0x0e24, # THAI CHARACTER RU + 0x00c5: 0x0e25, # THAI CHARACTER LO LING + 0x00c6: 0x0e26, # THAI CHARACTER LU + 0x00c7: 0x0e27, # THAI CHARACTER WO WAEN + 0x00c8: 0x0e28, # THAI CHARACTER SO SALA + 0x00c9: 0x0e29, # THAI CHARACTER SO RUSI + 0x00ca: 0x0e2a, # THAI CHARACTER SO SUA + 0x00cb: 0x0e2b, # THAI CHARACTER HO HIP + 0x00cc: 0x0e2c, # THAI CHARACTER LO CHULA + 0x00cd: 0x0e2d, # THAI CHARACTER O ANG + 0x00ce: 0x0e2e, # THAI CHARACTER HO NOKHUK + 0x00cf: 0x0e2f, # THAI CHARACTER PAIYANNOI + 0x00d0: 0x0e30, # THAI CHARACTER SARA A + 0x00d1: 0x0e31, # THAI CHARACTER MAI HAN-AKAT + 0x00d2: 0x0e32, # THAI CHARACTER SARA AA + 0x00d3: 0x0e33, # THAI CHARACTER SARA AM + 0x00d4: 0x0e34, # THAI CHARACTER SARA I + 0x00d5: 0x0e35, # THAI CHARACTER SARA II + 0x00d6: 0x0e36, # THAI CHARACTER SARA UE + 0x00d7: 0x0e37, # THAI CHARACTER SARA UEE + 0x00d8: 0x0e38, # THAI CHARACTER SARA U + 0x00d9: 0x0e39, # THAI CHARACTER SARA UU + 0x00da: 0x0e3a, # THAI CHARACTER PHINTHU + 0x00db: None, # UNDEFINED + 0x00dc: None, # UNDEFINED + 0x00dd: None, # UNDEFINED + 0x00de: None, # UNDEFINED + 0x00df: 0x0e3f, # THAI CURRENCY SYMBOL BAHT + 0x00e0: 0x0e40, # THAI CHARACTER SARA E + 0x00e1: 0x0e41, # THAI CHARACTER SARA AE + 0x00e2: 0x0e42, # THAI CHARACTER SARA O + 0x00e3: 0x0e43, # THAI CHARACTER SARA AI MAIMUAN + 0x00e4: 0x0e44, # THAI CHARACTER SARA AI MAIMALAI + 0x00e5: 0x0e45, # THAI CHARACTER LAKKHANGYAO + 0x00e6: 0x0e46, # THAI CHARACTER MAIYAMOK + 0x00e7: 0x0e47, # THAI CHARACTER MAITAIKHU + 0x00e8: 0x0e48, # THAI CHARACTER MAI EK + 0x00e9: 0x0e49, # THAI CHARACTER MAI THO + 0x00ea: 0x0e4a, # THAI CHARACTER MAI TRI + 0x00eb: 0x0e4b, # THAI CHARACTER MAI CHATTAWA + 0x00ec: 0x0e4c, # THAI CHARACTER THANTHAKHAT + 0x00ed: 0x0e4d, # THAI CHARACTER NIKHAHIT + 0x00ee: 0x0e4e, # THAI CHARACTER YAMAKKAN + 0x00ef: 0x0e4f, # THAI CHARACTER FONGMAN + 0x00f0: 0x0e50, # THAI DIGIT ZERO + 0x00f1: 0x0e51, # THAI DIGIT ONE + 0x00f2: 0x0e52, # THAI DIGIT TWO + 0x00f3: 0x0e53, # THAI DIGIT THREE + 0x00f4: 0x0e54, # THAI DIGIT FOUR + 0x00f5: 0x0e55, # THAI DIGIT FIVE + 0x00f6: 0x0e56, # THAI DIGIT SIX + 0x00f7: 0x0e57, # THAI DIGIT SEVEN + 0x00f8: 0x0e58, # THAI DIGIT EIGHT + 0x00f9: 0x0e59, # THAI DIGIT NINE + 0x00fa: 0x0e5a, # THAI CHARACTER ANGKHANKHU + 0x00fb: 0x0e5b, # THAI CHARACTER KHOMUT + 0x00fc: None, # UNDEFINED + 0x00fd: None, # UNDEFINED + 0x00fe: None, # UNDEFINED + 0x00ff: None, # UNDEFINED }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u20ac' # 0x0080 -> EURO SIGN + u'\ufffe' # 0x0081 -> UNDEFINED + u'\ufffe' # 0x0082 -> UNDEFINED + u'\ufffe' # 0x0083 -> UNDEFINED + u'\ufffe' # 0x0084 -> UNDEFINED + u'\u2026' # 0x0085 -> HORIZONTAL ELLIPSIS + u'\ufffe' # 0x0086 -> UNDEFINED + u'\ufffe' # 0x0087 -> UNDEFINED + u'\ufffe' # 0x0088 -> UNDEFINED + u'\ufffe' # 0x0089 -> UNDEFINED + u'\ufffe' # 0x008a -> UNDEFINED + u'\ufffe' # 0x008b -> UNDEFINED + u'\ufffe' # 0x008c -> UNDEFINED + u'\ufffe' # 0x008d -> UNDEFINED + u'\ufffe' # 0x008e -> UNDEFINED + u'\ufffe' # 0x008f -> UNDEFINED + u'\ufffe' # 0x0090 -> UNDEFINED + u'\u2018' # 0x0091 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x0092 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x0093 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x0094 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x0095 -> BULLET + u'\u2013' # 0x0096 -> EN DASH + u'\u2014' # 0x0097 -> EM DASH + u'\ufffe' # 0x0098 -> UNDEFINED + u'\ufffe' # 0x0099 -> UNDEFINED + u'\ufffe' # 0x009a -> UNDEFINED + u'\ufffe' # 0x009b -> UNDEFINED + u'\ufffe' # 0x009c -> UNDEFINED + u'\ufffe' # 0x009d -> UNDEFINED + u'\ufffe' # 0x009e -> UNDEFINED + u'\ufffe' # 0x009f -> UNDEFINED + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0e01' # 0x00a1 -> THAI CHARACTER KO KAI + u'\u0e02' # 0x00a2 -> THAI CHARACTER KHO KHAI + u'\u0e03' # 0x00a3 -> THAI CHARACTER KHO KHUAT + u'\u0e04' # 0x00a4 -> THAI CHARACTER KHO KHWAI + u'\u0e05' # 0x00a5 -> THAI CHARACTER KHO KHON + u'\u0e06' # 0x00a6 -> THAI CHARACTER KHO RAKHANG + u'\u0e07' # 0x00a7 -> THAI CHARACTER NGO NGU + u'\u0e08' # 0x00a8 -> THAI CHARACTER CHO CHAN + u'\u0e09' # 0x00a9 -> THAI CHARACTER CHO CHING + u'\u0e0a' # 0x00aa -> THAI CHARACTER CHO CHANG + u'\u0e0b' # 0x00ab -> THAI CHARACTER SO SO + u'\u0e0c' # 0x00ac -> THAI CHARACTER CHO CHOE + u'\u0e0d' # 0x00ad -> THAI CHARACTER YO YING + u'\u0e0e' # 0x00ae -> THAI CHARACTER DO CHADA + u'\u0e0f' # 0x00af -> THAI CHARACTER TO PATAK + u'\u0e10' # 0x00b0 -> THAI CHARACTER THO THAN + u'\u0e11' # 0x00b1 -> THAI CHARACTER THO NANGMONTHO + u'\u0e12' # 0x00b2 -> THAI CHARACTER THO PHUTHAO + u'\u0e13' # 0x00b3 -> THAI CHARACTER NO NEN + u'\u0e14' # 0x00b4 -> THAI CHARACTER DO DEK + u'\u0e15' # 0x00b5 -> THAI CHARACTER TO TAO + u'\u0e16' # 0x00b6 -> THAI CHARACTER THO THUNG + u'\u0e17' # 0x00b7 -> THAI CHARACTER THO THAHAN + u'\u0e18' # 0x00b8 -> THAI CHARACTER THO THONG + u'\u0e19' # 0x00b9 -> THAI CHARACTER NO NU + u'\u0e1a' # 0x00ba -> THAI CHARACTER BO BAIMAI + u'\u0e1b' # 0x00bb -> THAI CHARACTER PO PLA + u'\u0e1c' # 0x00bc -> THAI CHARACTER PHO PHUNG + u'\u0e1d' # 0x00bd -> THAI CHARACTER FO FA + u'\u0e1e' # 0x00be -> THAI CHARACTER PHO PHAN + u'\u0e1f' # 0x00bf -> THAI CHARACTER FO FAN + u'\u0e20' # 0x00c0 -> THAI CHARACTER PHO SAMPHAO + u'\u0e21' # 0x00c1 -> THAI CHARACTER MO MA + u'\u0e22' # 0x00c2 -> THAI CHARACTER YO YAK + u'\u0e23' # 0x00c3 -> THAI CHARACTER RO RUA + u'\u0e24' # 0x00c4 -> THAI CHARACTER RU + u'\u0e25' # 0x00c5 -> THAI CHARACTER LO LING + u'\u0e26' # 0x00c6 -> THAI CHARACTER LU + u'\u0e27' # 0x00c7 -> THAI CHARACTER WO WAEN + u'\u0e28' # 0x00c8 -> THAI CHARACTER SO SALA + u'\u0e29' # 0x00c9 -> THAI CHARACTER SO RUSI + u'\u0e2a' # 0x00ca -> THAI CHARACTER SO SUA + u'\u0e2b' # 0x00cb -> THAI CHARACTER HO HIP + u'\u0e2c' # 0x00cc -> THAI CHARACTER LO CHULA + u'\u0e2d' # 0x00cd -> THAI CHARACTER O ANG + u'\u0e2e' # 0x00ce -> THAI CHARACTER HO NOKHUK + u'\u0e2f' # 0x00cf -> THAI CHARACTER PAIYANNOI + u'\u0e30' # 0x00d0 -> THAI CHARACTER SARA A + u'\u0e31' # 0x00d1 -> THAI CHARACTER MAI HAN-AKAT + u'\u0e32' # 0x00d2 -> THAI CHARACTER SARA AA + u'\u0e33' # 0x00d3 -> THAI CHARACTER SARA AM + u'\u0e34' # 0x00d4 -> THAI CHARACTER SARA I + u'\u0e35' # 0x00d5 -> THAI CHARACTER SARA II + u'\u0e36' # 0x00d6 -> THAI CHARACTER SARA UE + u'\u0e37' # 0x00d7 -> THAI CHARACTER SARA UEE + u'\u0e38' # 0x00d8 -> THAI CHARACTER SARA U + u'\u0e39' # 0x00d9 -> THAI CHARACTER SARA UU + u'\u0e3a' # 0x00da -> THAI CHARACTER PHINTHU + u'\ufffe' # 0x00db -> UNDEFINED + u'\ufffe' # 0x00dc -> UNDEFINED + u'\ufffe' # 0x00dd -> UNDEFINED + u'\ufffe' # 0x00de -> UNDEFINED + u'\u0e3f' # 0x00df -> THAI CURRENCY SYMBOL BAHT + u'\u0e40' # 0x00e0 -> THAI CHARACTER SARA E + u'\u0e41' # 0x00e1 -> THAI CHARACTER SARA AE + u'\u0e42' # 0x00e2 -> THAI CHARACTER SARA O + u'\u0e43' # 0x00e3 -> THAI CHARACTER SARA AI MAIMUAN + u'\u0e44' # 0x00e4 -> THAI CHARACTER SARA AI MAIMALAI + u'\u0e45' # 0x00e5 -> THAI CHARACTER LAKKHANGYAO + u'\u0e46' # 0x00e6 -> THAI CHARACTER MAIYAMOK + u'\u0e47' # 0x00e7 -> THAI CHARACTER MAITAIKHU + u'\u0e48' # 0x00e8 -> THAI CHARACTER MAI EK + u'\u0e49' # 0x00e9 -> THAI CHARACTER MAI THO + u'\u0e4a' # 0x00ea -> THAI CHARACTER MAI TRI + u'\u0e4b' # 0x00eb -> THAI CHARACTER MAI CHATTAWA + u'\u0e4c' # 0x00ec -> THAI CHARACTER THANTHAKHAT + u'\u0e4d' # 0x00ed -> THAI CHARACTER NIKHAHIT + u'\u0e4e' # 0x00ee -> THAI CHARACTER YAMAKKAN + u'\u0e4f' # 0x00ef -> THAI CHARACTER FONGMAN + u'\u0e50' # 0x00f0 -> THAI DIGIT ZERO + u'\u0e51' # 0x00f1 -> THAI DIGIT ONE + u'\u0e52' # 0x00f2 -> THAI DIGIT TWO + u'\u0e53' # 0x00f3 -> THAI DIGIT THREE + u'\u0e54' # 0x00f4 -> THAI DIGIT FOUR + u'\u0e55' # 0x00f5 -> THAI DIGIT FIVE + u'\u0e56' # 0x00f6 -> THAI DIGIT SIX + u'\u0e57' # 0x00f7 -> THAI DIGIT SEVEN + u'\u0e58' # 0x00f8 -> THAI DIGIT EIGHT + u'\u0e59' # 0x00f9 -> THAI DIGIT NINE + u'\u0e5a' # 0x00fa -> THAI CHARACTER ANGKHANKHU + u'\u0e5b' # 0x00fb -> THAI CHARACTER KHOMUT + u'\ufffe' # 0x00fc -> UNDEFINED + u'\ufffe' # 0x00fd -> UNDEFINED + u'\ufffe' # 0x00fe -> UNDEFINED + u'\ufffe' # 0x00ff -> UNDEFINED +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x0e01: 0x00a1, # THAI CHARACTER KO KAI + 0x0e02: 0x00a2, # THAI CHARACTER KHO KHAI + 0x0e03: 0x00a3, # THAI CHARACTER KHO KHUAT + 0x0e04: 0x00a4, # THAI CHARACTER KHO KHWAI + 0x0e05: 0x00a5, # THAI CHARACTER KHO KHON + 0x0e06: 0x00a6, # THAI CHARACTER KHO RAKHANG + 0x0e07: 0x00a7, # THAI CHARACTER NGO NGU + 0x0e08: 0x00a8, # THAI CHARACTER CHO CHAN + 0x0e09: 0x00a9, # THAI CHARACTER CHO CHING + 0x0e0a: 0x00aa, # THAI CHARACTER CHO CHANG + 0x0e0b: 0x00ab, # THAI CHARACTER SO SO + 0x0e0c: 0x00ac, # THAI CHARACTER CHO CHOE + 0x0e0d: 0x00ad, # THAI CHARACTER YO YING + 0x0e0e: 0x00ae, # THAI CHARACTER DO CHADA + 0x0e0f: 0x00af, # THAI CHARACTER TO PATAK + 0x0e10: 0x00b0, # THAI CHARACTER THO THAN + 0x0e11: 0x00b1, # THAI CHARACTER THO NANGMONTHO + 0x0e12: 0x00b2, # THAI CHARACTER THO PHUTHAO + 0x0e13: 0x00b3, # THAI CHARACTER NO NEN + 0x0e14: 0x00b4, # THAI CHARACTER DO DEK + 0x0e15: 0x00b5, # THAI CHARACTER TO TAO + 0x0e16: 0x00b6, # THAI CHARACTER THO THUNG + 0x0e17: 0x00b7, # THAI CHARACTER THO THAHAN + 0x0e18: 0x00b8, # THAI CHARACTER THO THONG + 0x0e19: 0x00b9, # THAI CHARACTER NO NU + 0x0e1a: 0x00ba, # THAI CHARACTER BO BAIMAI + 0x0e1b: 0x00bb, # THAI CHARACTER PO PLA + 0x0e1c: 0x00bc, # THAI CHARACTER PHO PHUNG + 0x0e1d: 0x00bd, # THAI CHARACTER FO FA + 0x0e1e: 0x00be, # THAI CHARACTER PHO PHAN + 0x0e1f: 0x00bf, # THAI CHARACTER FO FAN + 0x0e20: 0x00c0, # THAI CHARACTER PHO SAMPHAO + 0x0e21: 0x00c1, # THAI CHARACTER MO MA + 0x0e22: 0x00c2, # THAI CHARACTER YO YAK + 0x0e23: 0x00c3, # THAI CHARACTER RO RUA + 0x0e24: 0x00c4, # THAI CHARACTER RU + 0x0e25: 0x00c5, # THAI CHARACTER LO LING + 0x0e26: 0x00c6, # THAI CHARACTER LU + 0x0e27: 0x00c7, # THAI CHARACTER WO WAEN + 0x0e28: 0x00c8, # THAI CHARACTER SO SALA + 0x0e29: 0x00c9, # THAI CHARACTER SO RUSI + 0x0e2a: 0x00ca, # THAI CHARACTER SO SUA + 0x0e2b: 0x00cb, # THAI CHARACTER HO HIP + 0x0e2c: 0x00cc, # THAI CHARACTER LO CHULA + 0x0e2d: 0x00cd, # THAI CHARACTER O ANG + 0x0e2e: 0x00ce, # THAI CHARACTER HO NOKHUK + 0x0e2f: 0x00cf, # THAI CHARACTER PAIYANNOI + 0x0e30: 0x00d0, # THAI CHARACTER SARA A + 0x0e31: 0x00d1, # THAI CHARACTER MAI HAN-AKAT + 0x0e32: 0x00d2, # THAI CHARACTER SARA AA + 0x0e33: 0x00d3, # THAI CHARACTER SARA AM + 0x0e34: 0x00d4, # THAI CHARACTER SARA I + 0x0e35: 0x00d5, # THAI CHARACTER SARA II + 0x0e36: 0x00d6, # THAI CHARACTER SARA UE + 0x0e37: 0x00d7, # THAI CHARACTER SARA UEE + 0x0e38: 0x00d8, # THAI CHARACTER SARA U + 0x0e39: 0x00d9, # THAI CHARACTER SARA UU + 0x0e3a: 0x00da, # THAI CHARACTER PHINTHU + 0x0e3f: 0x00df, # THAI CURRENCY SYMBOL BAHT + 0x0e40: 0x00e0, # THAI CHARACTER SARA E + 0x0e41: 0x00e1, # THAI CHARACTER SARA AE + 0x0e42: 0x00e2, # THAI CHARACTER SARA O + 0x0e43: 0x00e3, # THAI CHARACTER SARA AI MAIMUAN + 0x0e44: 0x00e4, # THAI CHARACTER SARA AI MAIMALAI + 0x0e45: 0x00e5, # THAI CHARACTER LAKKHANGYAO + 0x0e46: 0x00e6, # THAI CHARACTER MAIYAMOK + 0x0e47: 0x00e7, # THAI CHARACTER MAITAIKHU + 0x0e48: 0x00e8, # THAI CHARACTER MAI EK + 0x0e49: 0x00e9, # THAI CHARACTER MAI THO + 0x0e4a: 0x00ea, # THAI CHARACTER MAI TRI + 0x0e4b: 0x00eb, # THAI CHARACTER MAI CHATTAWA + 0x0e4c: 0x00ec, # THAI CHARACTER THANTHAKHAT + 0x0e4d: 0x00ed, # THAI CHARACTER NIKHAHIT + 0x0e4e: 0x00ee, # THAI CHARACTER YAMAKKAN + 0x0e4f: 0x00ef, # THAI CHARACTER FONGMAN + 0x0e50: 0x00f0, # THAI DIGIT ZERO + 0x0e51: 0x00f1, # THAI DIGIT ONE + 0x0e52: 0x00f2, # THAI DIGIT TWO + 0x0e53: 0x00f3, # THAI DIGIT THREE + 0x0e54: 0x00f4, # THAI DIGIT FOUR + 0x0e55: 0x00f5, # THAI DIGIT FIVE + 0x0e56: 0x00f6, # THAI DIGIT SIX + 0x0e57: 0x00f7, # THAI DIGIT SEVEN + 0x0e58: 0x00f8, # THAI DIGIT EIGHT + 0x0e59: 0x00f9, # THAI DIGIT NINE + 0x0e5a: 0x00fa, # THAI CHARACTER ANGKHANKHU + 0x0e5b: 0x00fb, # THAI CHARACTER KHOMUT + 0x2013: 0x0096, # EN DASH + 0x2014: 0x0097, # EM DASH + 0x2018: 0x0091, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x0092, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0x0093, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x0094, # RIGHT DOUBLE QUOTATION MARK + 0x2022: 0x0095, # BULLET + 0x2026: 0x0085, # HORIZONTAL ELLIPSIS + 0x20ac: 0x0080, # EURO SIGN +} \ No newline at end of file Index: iso8859_1.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_1.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- iso8859_1.py 8 Aug 2002 20:19:19 -0000 1.4 +++ iso8859_1.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-1.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-1.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -39,6 +34,524 @@ decoding_map.update({ }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\xa1' # 0x00a1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xaa' # 0x00aa -> FEMININE ORDINAL INDICATOR + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xba' # 0x00ba -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0x00bf -> INVERTED QUESTION MARK + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0x00d0 -> LATIN CAPITAL LETTER ETH (Icelandic) + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0x00dd -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0x00de -> LATIN CAPITAL LETTER THORN (Icelandic) + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S (German) + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0x00f0 -> LATIN SMALL LETTER ETH (Icelandic) + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0x00fd -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x00fe -> LATIN SMALL LETTER THORN (Icelandic) + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00ba: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00bf, # INVERTED QUESTION MARK + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0x00d0, # LATIN CAPITAL LETTER ETH (Icelandic) + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x00de, # LATIN CAPITAL LETTER THORN (Icelandic) + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S (German) + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x00f0, # LATIN SMALL LETTER ETH (Icelandic) + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x00fe, # LATIN SMALL LETTER THORN (Icelandic) + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS +} \ No newline at end of file Index: iso8859_10.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_10.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- iso8859_10.py 8 Aug 2002 20:19:19 -0000 1.4 +++ iso8859_10.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-10.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-10.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,54 +32,572 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x00a4: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE - 0x00a6: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00a8: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00a9: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00aa: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00ab: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE - 0x00ac: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00ae: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00af: 0x014a, # LATIN CAPITAL LETTER ENG - 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00b2: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x00b3: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE - 0x00b6: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00b8: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00b9: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00ba: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00bb: 0x0167, # LATIN SMALL LETTER T WITH STROKE - 0x00bc: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00bd: 0x2015, # HORIZONTAL BAR - 0x00be: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00bf: 0x014b, # LATIN SMALL LETTER ENG - 0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00d7: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE - 0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE - 0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00ff: 0x0138, # LATIN SMALL LETTER KRA + 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x00a4: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE + 0x00a6: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00a8: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00a9: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00aa: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00ab: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE + 0x00ac: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00ae: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00af: 0x014a, # LATIN CAPITAL LETTER ENG + 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00b2: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x00b3: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE + 0x00b6: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00b8: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00b9: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00ba: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00bb: 0x0167, # LATIN SMALL LETTER T WITH STROKE + 0x00bc: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00bd: 0x2015, # HORIZONTAL BAR + 0x00be: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00bf: 0x014b, # LATIN SMALL LETTER ENG + 0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00d7: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE + 0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE + 0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00ff: 0x0138, # LATIN SMALL LETTER KRA }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0104' # 0x00a1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0112' # 0x00a2 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0122' # 0x00a3 -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u012a' # 0x00a4 -> LATIN CAPITAL LETTER I WITH MACRON + u'\u0128' # 0x00a5 -> LATIN CAPITAL LETTER I WITH TILDE + u'\u0136' # 0x00a6 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\u013b' # 0x00a8 -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u0110' # 0x00a9 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0160' # 0x00aa -> LATIN CAPITAL LETTER S WITH CARON + u'\u0166' # 0x00ab -> LATIN CAPITAL LETTER T WITH STROKE + u'\u017d' # 0x00ac -> LATIN CAPITAL LETTER Z WITH CARON + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\u016a' # 0x00ae -> LATIN CAPITAL LETTER U WITH MACRON + u'\u014a' # 0x00af -> LATIN CAPITAL LETTER ENG + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\u0105' # 0x00b1 -> LATIN SMALL LETTER A WITH OGONEK + u'\u0113' # 0x00b2 -> LATIN SMALL LETTER E WITH MACRON + u'\u0123' # 0x00b3 -> LATIN SMALL LETTER G WITH CEDILLA + u'\u012b' # 0x00b4 -> LATIN SMALL LETTER I WITH MACRON + u'\u0129' # 0x00b5 -> LATIN SMALL LETTER I WITH TILDE + u'\u0137' # 0x00b6 -> LATIN SMALL LETTER K WITH CEDILLA + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\u013c' # 0x00b8 -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0111' # 0x00b9 -> LATIN SMALL LETTER D WITH STROKE + u'\u0161' # 0x00ba -> LATIN SMALL LETTER S WITH CARON + u'\u0167' # 0x00bb -> LATIN SMALL LETTER T WITH STROKE + u'\u017e' # 0x00bc -> LATIN SMALL LETTER Z WITH CARON + u'\u2015' # 0x00bd -> HORIZONTAL BAR + u'\u016b' # 0x00be -> LATIN SMALL LETTER U WITH MACRON + u'\u014b' # 0x00bf -> LATIN SMALL LETTER ENG + u'\u0100' # 0x00c0 -> LATIN CAPITAL LETTER A WITH MACRON + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\u012e' # 0x00c7 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u010c' # 0x00c8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0x00ca -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u0116' # 0x00cc -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0x00d0 -> LATIN CAPITAL LETTER ETH (Icelandic) + u'\u0145' # 0x00d1 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u014c' # 0x00d2 -> LATIN CAPITAL LETTER O WITH MACRON + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u0168' # 0x00d7 -> LATIN CAPITAL LETTER U WITH TILDE + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\u0172' # 0x00d9 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0x00dd -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0x00de -> LATIN CAPITAL LETTER THORN (Icelandic) + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S (German) + u'\u0101' # 0x00e0 -> LATIN SMALL LETTER A WITH MACRON + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\u012f' # 0x00e7 -> LATIN SMALL LETTER I WITH OGONEK + u'\u010d' # 0x00e8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0x00ea -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0117' # 0x00ec -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0x00f0 -> LATIN SMALL LETTER ETH (Icelandic) + u'\u0146' # 0x00f1 -> LATIN SMALL LETTER N WITH CEDILLA + u'\u014d' # 0x00f2 -> LATIN SMALL LETTER O WITH MACRON + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u0169' # 0x00f7 -> LATIN SMALL LETTER U WITH TILDE + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\u0173' # 0x00f9 -> LATIN SMALL LETTER U WITH OGONEK + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0x00fd -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x00fe -> LATIN SMALL LETTER THORN (Icelandic) + u'\u0138' # 0x00ff -> LATIN SMALL LETTER KRA +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a7: 0x00a7, # SECTION SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0x00d0, # LATIN CAPITAL LETTER ETH (Icelandic) + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x00de, # LATIN CAPITAL LETTER THORN (Icelandic) + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S (German) + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x00f0, # LATIN SMALL LETTER ETH (Icelandic) + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x00fe, # LATIN SMALL LETTER THORN (Icelandic) + 0x0100: 0x00c0, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0x00e0, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0x00a1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00b1, # LATIN SMALL LETTER A WITH OGONEK + 0x010c: 0x00c8, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00e8, # LATIN SMALL LETTER C WITH CARON + 0x0110: 0x00a9, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00b9, # LATIN SMALL LETTER D WITH STROKE + 0x0112: 0x00a2, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0x00b2, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0x00cc, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0x00ec, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0x00ca, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00ea, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0x00a3, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0x00b3, # LATIN SMALL LETTER G WITH CEDILLA + 0x0128: 0x00a5, # LATIN CAPITAL LETTER I WITH TILDE + 0x0129: 0x00b5, # LATIN SMALL LETTER I WITH TILDE + 0x012a: 0x00a4, # LATIN CAPITAL LETTER I WITH MACRON + 0x012b: 0x00b4, # LATIN SMALL LETTER I WITH MACRON + 0x012e: 0x00c7, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012f: 0x00e7, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0x00a6, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0x00b6, # LATIN SMALL LETTER K WITH CEDILLA + 0x0138: 0x00ff, # LATIN SMALL LETTER KRA + 0x013b: 0x00a8, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013c: 0x00b8, # LATIN SMALL LETTER L WITH CEDILLA + 0x0145: 0x00d1, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0x00f1, # LATIN SMALL LETTER N WITH CEDILLA + 0x014a: 0x00af, # LATIN CAPITAL LETTER ENG + 0x014b: 0x00bf, # LATIN SMALL LETTER ENG + 0x014c: 0x00d2, # LATIN CAPITAL LETTER O WITH MACRON + 0x014d: 0x00f2, # LATIN SMALL LETTER O WITH MACRON + 0x0160: 0x00aa, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00ba, # LATIN SMALL LETTER S WITH CARON + 0x0166: 0x00ab, # LATIN CAPITAL LETTER T WITH STROKE + 0x0167: 0x00bb, # LATIN SMALL LETTER T WITH STROKE + 0x0168: 0x00d7, # LATIN CAPITAL LETTER U WITH TILDE + 0x0169: 0x00f7, # LATIN SMALL LETTER U WITH TILDE + 0x016a: 0x00ae, # LATIN CAPITAL LETTER U WITH MACRON + 0x016b: 0x00be, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0x00d9, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0x00f9, # LATIN SMALL LETTER U WITH OGONEK + 0x017d: 0x00ac, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00bc, # LATIN SMALL LETTER Z WITH CARON + 0x2015: 0x00bd, # HORIZONTAL BAR +} \ No newline at end of file Index: iso8859_11.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_11.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- iso8859_11.py 7 Aug 2004 06:03:08 -0000 1.2 +++ iso8859_11.py 21 Oct 2005 13:49:12 -0000 1.3 @@ -1,7 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-11.TXT' with gencodec.py. - - Generated from mapping found in - ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT +""" Python Character Mapping Codec generated from 'ISO8859/8859-11.TXT' with gencodec.py. """#" @@ -17,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -35,103 +32,613 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0e01, # THAI CHARACTER KO KAI - 0x00a2: 0x0e02, # THAI CHARACTER KHO KHAI - 0x00a3: 0x0e03, # THAI CHARACTER KHO KHUAT - 0x00a4: 0x0e04, # THAI CHARACTER KHO KHWAI - 0x00a5: 0x0e05, # THAI CHARACTER KHO KHON - 0x00a6: 0x0e06, # THAI CHARACTER KHO RAKHANG - 0x00a7: 0x0e07, # THAI CHARACTER NGO NGU - 0x00a8: 0x0e08, # THAI CHARACTER CHO CHAN - 0x00a9: 0x0e09, # THAI CHARACTER CHO CHING - 0x00aa: 0x0e0a, # THAI CHARACTER CHO CHANG - 0x00ab: 0x0e0b, # THAI CHARACTER SO SO - 0x00ac: 0x0e0c, # THAI CHARACTER CHO CHOE - 0x00ad: 0x0e0d, # THAI CHARACTER YO YING - 0x00ae: 0x0e0e, # THAI CHARACTER DO CHADA - 0x00af: 0x0e0f, # THAI CHARACTER TO PATAK - 0x00b0: 0x0e10, # THAI CHARACTER THO THAN - 0x00b1: 0x0e11, # THAI CHARACTER THO NANGMONTHO - 0x00b2: 0x0e12, # THAI CHARACTER THO PHUTHAO - 0x00b3: 0x0e13, # THAI CHARACTER NO NEN - 0x00b4: 0x0e14, # THAI CHARACTER DO DEK - 0x00b5: 0x0e15, # THAI CHARACTER TO TAO - 0x00b6: 0x0e16, # THAI CHARACTER THO THUNG - 0x00b7: 0x0e17, # THAI CHARACTER THO THAHAN - 0x00b8: 0x0e18, # THAI CHARACTER THO THONG - 0x00b9: 0x0e19, # THAI CHARACTER NO NU - 0x00ba: 0x0e1a, # THAI CHARACTER BO BAIMAI - 0x00bb: 0x0e1b, # THAI CHARACTER PO PLA - 0x00bc: 0x0e1c, # THAI CHARACTER PHO PHUNG - 0x00bd: 0x0e1d, # THAI CHARACTER FO FA - 0x00be: 0x0e1e, # THAI CHARACTER PHO PHAN - 0x00bf: 0x0e1f, # THAI CHARACTER FO FAN - 0x00c0: 0x0e20, # THAI CHARACTER PHO SAMPHAO - 0x00c1: 0x0e21, # THAI CHARACTER MO MA - 0x00c2: 0x0e22, # THAI CHARACTER YO YAK - 0x00c3: 0x0e23, # THAI CHARACTER RO RUA - 0x00c4: 0x0e24, # THAI CHARACTER RU - 0x00c5: 0x0e25, # THAI CHARACTER LO LING - 0x00c6: 0x0e26, # THAI CHARACTER LU - 0x00c7: 0x0e27, # THAI CHARACTER WO WAEN - 0x00c8: 0x0e28, # THAI CHARACTER SO SALA - 0x00c9: 0x0e29, # THAI CHARACTER SO RUSI - 0x00ca: 0x0e2a, # THAI CHARACTER SO SUA - 0x00cb: 0x0e2b, # THAI CHARACTER HO HIP - 0x00cc: 0x0e2c, # THAI CHARACTER LO CHULA - 0x00cd: 0x0e2d, # THAI CHARACTER O ANG - 0x00ce: 0x0e2e, # THAI CHARACTER HO NOKHUK - 0x00cf: 0x0e2f, # THAI CHARACTER PAIYANNOI - 0x00d0: 0x0e30, # THAI CHARACTER SARA A - 0x00d1: 0x0e31, # THAI CHARACTER MAI HAN-AKAT - 0x00d2: 0x0e32, # THAI CHARACTER SARA AA - 0x00d3: 0x0e33, # THAI CHARACTER SARA AM - 0x00d4: 0x0e34, # THAI CHARACTER SARA I - 0x00d5: 0x0e35, # THAI CHARACTER SARA II - 0x00d6: 0x0e36, # THAI CHARACTER SARA UE - 0x00d7: 0x0e37, # THAI CHARACTER SARA UEE - 0x00d8: 0x0e38, # THAI CHARACTER SARA U - 0x00d9: 0x0e39, # THAI CHARACTER SARA UU - 0x00da: 0x0e3a, # THAI CHARACTER PHINTHU - 0x00db: None, - 0x00dc: None, - 0x00dd: None, - 0x00de: None, - 0x00df: 0x0e3f, # THAI CURRENCY SYMBOL BAHT - 0x00e0: 0x0e40, # THAI CHARACTER SARA E - 0x00e1: 0x0e41, # THAI CHARACTER SARA AE - 0x00e2: 0x0e42, # THAI CHARACTER SARA O - 0x00e3: 0x0e43, # THAI CHARACTER SARA AI MAIMUAN - 0x00e4: 0x0e44, # THAI CHARACTER SARA AI MAIMALAI - 0x00e5: 0x0e45, # THAI CHARACTER LAKKHANGYAO - 0x00e6: 0x0e46, # THAI CHARACTER MAIYAMOK - 0x00e7: 0x0e47, # THAI CHARACTER MAITAIKHU - 0x00e8: 0x0e48, # THAI CHARACTER MAI EK - 0x00e9: 0x0e49, # THAI CHARACTER MAI THO - 0x00ea: 0x0e4a, # THAI CHARACTER MAI TRI - 0x00eb: 0x0e4b, # THAI CHARACTER MAI CHATTAWA - 0x00ec: 0x0e4c, # THAI CHARACTER THANTHAKHAT - 0x00ed: 0x0e4d, # THAI CHARACTER NIKHAHIT - 0x00ee: 0x0e4e, # THAI CHARACTER YAMAKKAN - 0x00ef: 0x0e4f, # THAI CHARACTER FONGMAN - 0x00f0: 0x0e50, # THAI DIGIT ZERO - 0x00f1: 0x0e51, # THAI DIGIT ONE - 0x00f2: 0x0e52, # THAI DIGIT TWO - 0x00f3: 0x0e53, # THAI DIGIT THREE - 0x00f4: 0x0e54, # THAI DIGIT FOUR - 0x00f5: 0x0e55, # THAI DIGIT FIVE - 0x00f6: 0x0e56, # THAI DIGIT SIX - 0x00f7: 0x0e57, # THAI DIGIT SEVEN - 0x00f8: 0x0e58, # THAI DIGIT EIGHT - 0x00f9: 0x0e59, # THAI DIGIT NINE - 0x00fa: 0x0e5a, # THAI CHARACTER ANGKHANKHU - 0x00fb: 0x0e5b, # THAI CHARACTER KHOMUT - 0x00fc: None, - 0x00fd: None, - 0x00fe: None, - 0x00ff: None, + 0x00a1: 0x0e01, # THAI CHARACTER KO KAI + 0x00a2: 0x0e02, # THAI CHARACTER KHO KHAI + 0x00a3: 0x0e03, # THAI CHARACTER KHO KHUAT + 0x00a4: 0x0e04, # THAI CHARACTER KHO KHWAI + 0x00a5: 0x0e05, # THAI CHARACTER KHO KHON + 0x00a6: 0x0e06, # THAI CHARACTER KHO RAKHANG + 0x00a7: 0x0e07, # THAI CHARACTER NGO NGU + 0x00a8: 0x0e08, # THAI CHARACTER CHO CHAN + 0x00a9: 0x0e09, # THAI CHARACTER CHO CHING + 0x00aa: 0x0e0a, # THAI CHARACTER CHO CHANG + 0x00ab: 0x0e0b, # THAI CHARACTER SO SO + 0x00ac: 0x0e0c, # THAI CHARACTER CHO CHOE + 0x00ad: 0x0e0d, # THAI CHARACTER YO YING + 0x00ae: 0x0e0e, # THAI CHARACTER DO CHADA + 0x00af: 0x0e0f, # THAI CHARACTER TO PATAK + 0x00b0: 0x0e10, # THAI CHARACTER THO THAN + 0x00b1: 0x0e11, # THAI CHARACTER THO NANGMONTHO + 0x00b2: 0x0e12, # THAI CHARACTER THO PHUTHAO + 0x00b3: 0x0e13, # THAI CHARACTER NO NEN + 0x00b4: 0x0e14, # THAI CHARACTER DO DEK + 0x00b5: 0x0e15, # THAI CHARACTER TO TAO + 0x00b6: 0x0e16, # THAI CHARACTER THO THUNG + 0x00b7: 0x0e17, # THAI CHARACTER THO THAHAN + 0x00b8: 0x0e18, # THAI CHARACTER THO THONG + 0x00b9: 0x0e19, # THAI CHARACTER NO NU + 0x00ba: 0x0e1a, # THAI CHARACTER BO BAIMAI + 0x00bb: 0x0e1b, # THAI CHARACTER PO PLA + 0x00bc: 0x0e1c, # THAI CHARACTER PHO PHUNG + 0x00bd: 0x0e1d, # THAI CHARACTER FO FA + 0x00be: 0x0e1e, # THAI CHARACTER PHO PHAN + 0x00bf: 0x0e1f, # THAI CHARACTER FO FAN + 0x00c0: 0x0e20, # THAI CHARACTER PHO SAMPHAO + 0x00c1: 0x0e21, # THAI CHARACTER MO MA + 0x00c2: 0x0e22, # THAI CHARACTER YO YAK + 0x00c3: 0x0e23, # THAI CHARACTER RO RUA + 0x00c4: 0x0e24, # THAI CHARACTER RU + 0x00c5: 0x0e25, # THAI CHARACTER LO LING + 0x00c6: 0x0e26, # THAI CHARACTER LU + 0x00c7: 0x0e27, # THAI CHARACTER WO WAEN + 0x00c8: 0x0e28, # THAI CHARACTER SO SALA + 0x00c9: 0x0e29, # THAI CHARACTER SO RUSI + 0x00ca: 0x0e2a, # THAI CHARACTER SO SUA + 0x00cb: 0x0e2b, # THAI CHARACTER HO HIP + 0x00cc: 0x0e2c, # THAI CHARACTER LO CHULA + 0x00cd: 0x0e2d, # THAI CHARACTER O ANG + 0x00ce: 0x0e2e, # THAI CHARACTER HO NOKHUK + 0x00cf: 0x0e2f, # THAI CHARACTER PAIYANNOI + 0x00d0: 0x0e30, # THAI CHARACTER SARA A + 0x00d1: 0x0e31, # THAI CHARACTER MAI HAN-AKAT + 0x00d2: 0x0e32, # THAI CHARACTER SARA AA + 0x00d3: 0x0e33, # THAI CHARACTER SARA AM + 0x00d4: 0x0e34, # THAI CHARACTER SARA I + 0x00d5: 0x0e35, # THAI CHARACTER SARA II + 0x00d6: 0x0e36, # THAI CHARACTER SARA UE + 0x00d7: 0x0e37, # THAI CHARACTER SARA UEE + 0x00d8: 0x0e38, # THAI CHARACTER SARA U + 0x00d9: 0x0e39, # THAI CHARACTER SARA UU + 0x00da: 0x0e3a, # THAI CHARACTER PHINTHU + 0x00db: None, + 0x00dc: None, + 0x00dd: None, + 0x00de: None, + 0x00df: 0x0e3f, # THAI CURRENCY SYMBOL BAHT + 0x00e0: 0x0e40, # THAI CHARACTER SARA E + 0x00e1: 0x0e41, # THAI CHARACTER SARA AE + 0x00e2: 0x0e42, # THAI CHARACTER SARA O + 0x00e3: 0x0e43, # THAI CHARACTER SARA AI MAIMUAN + 0x00e4: 0x0e44, # THAI CHARACTER SARA AI MAIMALAI + 0x00e5: 0x0e45, # THAI CHARACTER LAKKHANGYAO + 0x00e6: 0x0e46, # THAI CHARACTER MAIYAMOK + 0x00e7: 0x0e47, # THAI CHARACTER MAITAIKHU + 0x00e8: 0x0e48, # THAI CHARACTER MAI EK + 0x00e9: 0x0e49, # THAI CHARACTER MAI THO + 0x00ea: 0x0e4a, # THAI CHARACTER MAI TRI + 0x00eb: 0x0e4b, # THAI CHARACTER MAI CHATTAWA + 0x00ec: 0x0e4c, # THAI CHARACTER THANTHAKHAT + 0x00ed: 0x0e4d, # THAI CHARACTER NIKHAHIT + 0x00ee: 0x0e4e, # THAI CHARACTER YAMAKKAN + 0x00ef: 0x0e4f, # THAI CHARACTER FONGMAN + 0x00f0: 0x0e50, # THAI DIGIT ZERO + 0x00f1: 0x0e51, # THAI DIGIT ONE + 0x00f2: 0x0e52, # THAI DIGIT TWO + 0x00f3: 0x0e53, # THAI DIGIT THREE + 0x00f4: 0x0e54, # THAI DIGIT FOUR + 0x00f5: 0x0e55, # THAI DIGIT FIVE + 0x00f6: 0x0e56, # THAI DIGIT SIX + 0x00f7: 0x0e57, # THAI DIGIT SEVEN + 0x00f8: 0x0e58, # THAI DIGIT EIGHT + 0x00f9: 0x0e59, # THAI DIGIT NINE + 0x00fa: 0x0e5a, # THAI CHARACTER ANGKHANKHU + 0x00fb: 0x0e5b, # THAI CHARACTER KHOMUT + 0x00fc: None, + 0x00fd: None, + 0x00fe: None, + 0x00ff: None, }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0e01' # 0x00a1 -> THAI CHARACTER KO KAI + u'\u0e02' # 0x00a2 -> THAI CHARACTER KHO KHAI + u'\u0e03' # 0x00a3 -> THAI CHARACTER KHO KHUAT + u'\u0e04' # 0x00a4 -> THAI CHARACTER KHO KHWAI + u'\u0e05' # 0x00a5 -> THAI CHARACTER KHO KHON + u'\u0e06' # 0x00a6 -> THAI CHARACTER KHO RAKHANG + u'\u0e07' # 0x00a7 -> THAI CHARACTER NGO NGU + u'\u0e08' # 0x00a8 -> THAI CHARACTER CHO CHAN + u'\u0e09' # 0x00a9 -> THAI CHARACTER CHO CHING + u'\u0e0a' # 0x00aa -> THAI CHARACTER CHO CHANG + u'\u0e0b' # 0x00ab -> THAI CHARACTER SO SO + u'\u0e0c' # 0x00ac -> THAI CHARACTER CHO CHOE + u'\u0e0d' # 0x00ad -> THAI CHARACTER YO YING + u'\u0e0e' # 0x00ae -> THAI CHARACTER DO CHADA + u'\u0e0f' # 0x00af -> THAI CHARACTER TO PATAK + u'\u0e10' # 0x00b0 -> THAI CHARACTER THO THAN + u'\u0e11' # 0x00b1 -> THAI CHARACTER THO NANGMONTHO + u'\u0e12' # 0x00b2 -> THAI CHARACTER THO PHUTHAO + u'\u0e13' # 0x00b3 -> THAI CHARACTER NO NEN + u'\u0e14' # 0x00b4 -> THAI CHARACTER DO DEK + u'\u0e15' # 0x00b5 -> THAI CHARACTER TO TAO + u'\u0e16' # 0x00b6 -> THAI CHARACTER THO THUNG + u'\u0e17' # 0x00b7 -> THAI CHARACTER THO THAHAN + u'\u0e18' # 0x00b8 -> THAI CHARACTER THO THONG + u'\u0e19' # 0x00b9 -> THAI CHARACTER NO NU + u'\u0e1a' # 0x00ba -> THAI CHARACTER BO BAIMAI + u'\u0e1b' # 0x00bb -> THAI CHARACTER PO PLA + u'\u0e1c' # 0x00bc -> THAI CHARACTER PHO PHUNG + u'\u0e1d' # 0x00bd -> THAI CHARACTER FO FA + u'\u0e1e' # 0x00be -> THAI CHARACTER PHO PHAN + u'\u0e1f' # 0x00bf -> THAI CHARACTER FO FAN + u'\u0e20' # 0x00c0 -> THAI CHARACTER PHO SAMPHAO + u'\u0e21' # 0x00c1 -> THAI CHARACTER MO MA + u'\u0e22' # 0x00c2 -> THAI CHARACTER YO YAK + u'\u0e23' # 0x00c3 -> THAI CHARACTER RO RUA + u'\u0e24' # 0x00c4 -> THAI CHARACTER RU + u'\u0e25' # 0x00c5 -> THAI CHARACTER LO LING + u'\u0e26' # 0x00c6 -> THAI CHARACTER LU + u'\u0e27' # 0x00c7 -> THAI CHARACTER WO WAEN + u'\u0e28' # 0x00c8 -> THAI CHARACTER SO SALA + u'\u0e29' # 0x00c9 -> THAI CHARACTER SO RUSI + u'\u0e2a' # 0x00ca -> THAI CHARACTER SO SUA + u'\u0e2b' # 0x00cb -> THAI CHARACTER HO HIP + u'\u0e2c' # 0x00cc -> THAI CHARACTER LO CHULA + u'\u0e2d' # 0x00cd -> THAI CHARACTER O ANG + u'\u0e2e' # 0x00ce -> THAI CHARACTER HO NOKHUK + u'\u0e2f' # 0x00cf -> THAI CHARACTER PAIYANNOI + u'\u0e30' # 0x00d0 -> THAI CHARACTER SARA A + u'\u0e31' # 0x00d1 -> THAI CHARACTER MAI HAN-AKAT + u'\u0e32' # 0x00d2 -> THAI CHARACTER SARA AA + u'\u0e33' # 0x00d3 -> THAI CHARACTER SARA AM + u'\u0e34' # 0x00d4 -> THAI CHARACTER SARA I + u'\u0e35' # 0x00d5 -> THAI CHARACTER SARA II + u'\u0e36' # 0x00d6 -> THAI CHARACTER SARA UE + u'\u0e37' # 0x00d7 -> THAI CHARACTER SARA UEE + u'\u0e38' # 0x00d8 -> THAI CHARACTER SARA U + u'\u0e39' # 0x00d9 -> THAI CHARACTER SARA UU + u'\u0e3a' # 0x00da -> THAI CHARACTER PHINTHU + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u0e3f' # 0x00df -> THAI CURRENCY SYMBOL BAHT + u'\u0e40' # 0x00e0 -> THAI CHARACTER SARA E + u'\u0e41' # 0x00e1 -> THAI CHARACTER SARA AE + u'\u0e42' # 0x00e2 -> THAI CHARACTER SARA O + u'\u0e43' # 0x00e3 -> THAI CHARACTER SARA AI MAIMUAN + u'\u0e44' # 0x00e4 -> THAI CHARACTER SARA AI MAIMALAI + u'\u0e45' # 0x00e5 -> THAI CHARACTER LAKKHANGYAO + u'\u0e46' # 0x00e6 -> THAI CHARACTER MAIYAMOK + u'\u0e47' # 0x00e7 -> THAI CHARACTER MAITAIKHU + u'\u0e48' # 0x00e8 -> THAI CHARACTER MAI EK + u'\u0e49' # 0x00e9 -> THAI CHARACTER MAI THO + u'\u0e4a' # 0x00ea -> THAI CHARACTER MAI TRI + u'\u0e4b' # 0x00eb -> THAI CHARACTER MAI CHATTAWA + u'\u0e4c' # 0x00ec -> THAI CHARACTER THANTHAKHAT + u'\u0e4d' # 0x00ed -> THAI CHARACTER NIKHAHIT + u'\u0e4e' # 0x00ee -> THAI CHARACTER YAMAKKAN + u'\u0e4f' # 0x00ef -> THAI CHARACTER FONGMAN + u'\u0e50' # 0x00f0 -> THAI DIGIT ZERO + u'\u0e51' # 0x00f1 -> THAI DIGIT ONE + u'\u0e52' # 0x00f2 -> THAI DIGIT TWO + u'\u0e53' # 0x00f3 -> THAI DIGIT THREE + u'\u0e54' # 0x00f4 -> THAI DIGIT FOUR + u'\u0e55' # 0x00f5 -> THAI DIGIT FIVE + u'\u0e56' # 0x00f6 -> THAI DIGIT SIX + u'\u0e57' # 0x00f7 -> THAI DIGIT SEVEN + u'\u0e58' # 0x00f8 -> THAI DIGIT EIGHT + u'\u0e59' # 0x00f9 -> THAI DIGIT NINE + u'\u0e5a' # 0x00fa -> THAI CHARACTER ANGKHANKHU + u'\u0e5b' # 0x00fb -> THAI CHARACTER KHOMUT + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x0e01: 0x00a1, # THAI CHARACTER KO KAI + 0x0e02: 0x00a2, # THAI CHARACTER KHO KHAI + 0x0e03: 0x00a3, # THAI CHARACTER KHO KHUAT + 0x0e04: 0x00a4, # THAI CHARACTER KHO KHWAI + 0x0e05: 0x00a5, # THAI CHARACTER KHO KHON + 0x0e06: 0x00a6, # THAI CHARACTER KHO RAKHANG + 0x0e07: 0x00a7, # THAI CHARACTER NGO NGU + 0x0e08: 0x00a8, # THAI CHARACTER CHO CHAN + 0x0e09: 0x00a9, # THAI CHARACTER CHO CHING + 0x0e0a: 0x00aa, # THAI CHARACTER CHO CHANG + 0x0e0b: 0x00ab, # THAI CHARACTER SO SO + 0x0e0c: 0x00ac, # THAI CHARACTER CHO CHOE + 0x0e0d: 0x00ad, # THAI CHARACTER YO YING + 0x0e0e: 0x00ae, # THAI CHARACTER DO CHADA + 0x0e0f: 0x00af, # THAI CHARACTER TO PATAK + 0x0e10: 0x00b0, # THAI CHARACTER THO THAN + 0x0e11: 0x00b1, # THAI CHARACTER THO NANGMONTHO + 0x0e12: 0x00b2, # THAI CHARACTER THO PHUTHAO + 0x0e13: 0x00b3, # THAI CHARACTER NO NEN + 0x0e14: 0x00b4, # THAI CHARACTER DO DEK + 0x0e15: 0x00b5, # THAI CHARACTER TO TAO + 0x0e16: 0x00b6, # THAI CHARACTER THO THUNG + 0x0e17: 0x00b7, # THAI CHARACTER THO THAHAN + 0x0e18: 0x00b8, # THAI CHARACTER THO THONG + 0x0e19: 0x00b9, # THAI CHARACTER NO NU + 0x0e1a: 0x00ba, # THAI CHARACTER BO BAIMAI + 0x0e1b: 0x00bb, # THAI CHARACTER PO PLA + 0x0e1c: 0x00bc, # THAI CHARACTER PHO PHUNG + 0x0e1d: 0x00bd, # THAI CHARACTER FO FA + 0x0e1e: 0x00be, # THAI CHARACTER PHO PHAN + 0x0e1f: 0x00bf, # THAI CHARACTER FO FAN + 0x0e20: 0x00c0, # THAI CHARACTER PHO SAMPHAO + 0x0e21: 0x00c1, # THAI CHARACTER MO MA + 0x0e22: 0x00c2, # THAI CHARACTER YO YAK + 0x0e23: 0x00c3, # THAI CHARACTER RO RUA + 0x0e24: 0x00c4, # THAI CHARACTER RU + 0x0e25: 0x00c5, # THAI CHARACTER LO LING + 0x0e26: 0x00c6, # THAI CHARACTER LU + 0x0e27: 0x00c7, # THAI CHARACTER WO WAEN + 0x0e28: 0x00c8, # THAI CHARACTER SO SALA + 0x0e29: 0x00c9, # THAI CHARACTER SO RUSI + 0x0e2a: 0x00ca, # THAI CHARACTER SO SUA + 0x0e2b: 0x00cb, # THAI CHARACTER HO HIP + 0x0e2c: 0x00cc, # THAI CHARACTER LO CHULA + 0x0e2d: 0x00cd, # THAI CHARACTER O ANG + 0x0e2e: 0x00ce, # THAI CHARACTER HO NOKHUK + 0x0e2f: 0x00cf, # THAI CHARACTER PAIYANNOI + 0x0e30: 0x00d0, # THAI CHARACTER SARA A + 0x0e31: 0x00d1, # THAI CHARACTER MAI HAN-AKAT + 0x0e32: 0x00d2, # THAI CHARACTER SARA AA + 0x0e33: 0x00d3, # THAI CHARACTER SARA AM + 0x0e34: 0x00d4, # THAI CHARACTER SARA I + 0x0e35: 0x00d5, # THAI CHARACTER SARA II + 0x0e36: 0x00d6, # THAI CHARACTER SARA UE + 0x0e37: 0x00d7, # THAI CHARACTER SARA UEE + 0x0e38: 0x00d8, # THAI CHARACTER SARA U + 0x0e39: 0x00d9, # THAI CHARACTER SARA UU + 0x0e3a: 0x00da, # THAI CHARACTER PHINTHU + 0x0e3f: 0x00df, # THAI CURRENCY SYMBOL BAHT + 0x0e40: 0x00e0, # THAI CHARACTER SARA E + 0x0e41: 0x00e1, # THAI CHARACTER SARA AE + 0x0e42: 0x00e2, # THAI CHARACTER SARA O + 0x0e43: 0x00e3, # THAI CHARACTER SARA AI MAIMUAN + 0x0e44: 0x00e4, # THAI CHARACTER SARA AI MAIMALAI + 0x0e45: 0x00e5, # THAI CHARACTER LAKKHANGYAO + 0x0e46: 0x00e6, # THAI CHARACTER MAIYAMOK + 0x0e47: 0x00e7, # THAI CHARACTER MAITAIKHU + 0x0e48: 0x00e8, # THAI CHARACTER MAI EK + 0x0e49: 0x00e9, # THAI CHARACTER MAI THO + 0x0e4a: 0x00ea, # THAI CHARACTER MAI TRI + 0x0e4b: 0x00eb, # THAI CHARACTER MAI CHATTAWA + 0x0e4c: 0x00ec, # THAI CHARACTER THANTHAKHAT + 0x0e4d: 0x00ed, # THAI CHARACTER NIKHAHIT + 0x0e4e: 0x00ee, # THAI CHARACTER YAMAKKAN + 0x0e4f: 0x00ef, # THAI CHARACTER FONGMAN + 0x0e50: 0x00f0, # THAI DIGIT ZERO + 0x0e51: 0x00f1, # THAI DIGIT ONE + 0x0e52: 0x00f2, # THAI DIGIT TWO + 0x0e53: 0x00f3, # THAI DIGIT THREE + 0x0e54: 0x00f4, # THAI DIGIT FOUR + 0x0e55: 0x00f5, # THAI DIGIT FIVE + 0x0e56: 0x00f6, # THAI DIGIT SIX + 0x0e57: 0x00f7, # THAI DIGIT SEVEN + 0x0e58: 0x00f8, # THAI DIGIT EIGHT + 0x0e59: 0x00f9, # THAI DIGIT NINE + 0x0e5a: 0x00fa, # THAI CHARACTER ANGKHANKHU + 0x0e5b: 0x00fb, # THAI CHARACTER KHOMUT +} \ No newline at end of file Index: iso8859_13.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_13.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- iso8859_13.py 8 Aug 2002 20:19:19 -0000 1.4 +++ iso8859_13.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-13.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-13.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,64 +32,582 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x00af: 0x00c6, # LATIN CAPITAL LETTER AE - 0x00b4: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x00bf: 0x00e6, # LATIN SMALL LETTER AE - 0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x00af: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00b4: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x00bf: 0x00e6, # LATIN SMALL LETTER AE + 0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u201d' # 0x00a1 -> RIGHT DOUBLE QUOTATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\u201e' # 0x00a5 -> DOUBLE LOW-9 QUOTATION MARK + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xd8' # 0x00a8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u0156' # 0x00aa -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xc6' # 0x00af -> LATIN CAPITAL LETTER AE + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\u201c' # 0x00b4 -> LEFT DOUBLE QUOTATION MARK + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xf8' # 0x00b8 -> LATIN SMALL LETTER O WITH STROKE + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\u0157' # 0x00ba -> LATIN SMALL LETTER R WITH CEDILLA + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xe6' # 0x00bf -> LATIN SMALL LETTER AE + u'\u0104' # 0x00c0 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u012e' # 0x00c1 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u0100' # 0x00c2 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u0106' # 0x00c3 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\u0118' # 0x00c6 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0112' # 0x00c7 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u010c' # 0x00c8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0179' # 0x00ca -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\u0116' # 0x00cb -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\u0122' # 0x00cc -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u0136' # 0x00cd -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u012a' # 0x00ce -> LATIN CAPITAL LETTER I WITH MACRON + u'\u013b' # 0x00cf -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u0160' # 0x00d0 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0143' # 0x00d1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0145' # 0x00d2 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\u014c' # 0x00d4 -> LATIN CAPITAL LETTER O WITH MACRON + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\u0172' # 0x00d8 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u0141' # 0x00d9 -> LATIN CAPITAL LETTER L WITH STROKE + u'\u015a' # 0x00da -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u016a' # 0x00db -> LATIN CAPITAL LETTER U WITH MACRON + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u017b' # 0x00dd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017d' # 0x00de -> LATIN CAPITAL LETTER Z WITH CARON + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S (German) + u'\u0105' # 0x00e0 -> LATIN SMALL LETTER A WITH OGONEK + u'\u012f' # 0x00e1 -> LATIN SMALL LETTER I WITH OGONEK + u'\u0101' # 0x00e2 -> LATIN SMALL LETTER A WITH MACRON + u'\u0107' # 0x00e3 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\u0119' # 0x00e6 -> LATIN SMALL LETTER E WITH OGONEK + u'\u0113' # 0x00e7 -> LATIN SMALL LETTER E WITH MACRON + u'\u010d' # 0x00e8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u017a' # 0x00ea -> LATIN SMALL LETTER Z WITH ACUTE + u'\u0117' # 0x00eb -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\u0123' # 0x00ec -> LATIN SMALL LETTER G WITH CEDILLA + u'\u0137' # 0x00ed -> LATIN SMALL LETTER K WITH CEDILLA + u'\u012b' # 0x00ee -> LATIN SMALL LETTER I WITH MACRON + u'\u013c' # 0x00ef -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0161' # 0x00f0 -> LATIN SMALL LETTER S WITH CARON + u'\u0144' # 0x00f1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0146' # 0x00f2 -> LATIN SMALL LETTER N WITH CEDILLA + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\u014d' # 0x00f4 -> LATIN SMALL LETTER O WITH MACRON + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\u0173' # 0x00f8 -> LATIN SMALL LETTER U WITH OGONEK + u'\u0142' # 0x00f9 -> LATIN SMALL LETTER L WITH STROKE + u'\u015b' # 0x00fa -> LATIN SMALL LETTER S WITH ACUTE + u'\u016b' # 0x00fb -> LATIN SMALL LETTER U WITH MACRON + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u017c' # 0x00fd -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u017e' # 0x00fe -> LATIN SMALL LETTER Z WITH CARON + u'\u2019' # 0x00ff -> RIGHT SINGLE QUOTATION MARK +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00af, # LATIN CAPITAL LETTER AE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00a8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S (German) + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00bf, # LATIN SMALL LETTER AE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00b8, # LATIN SMALL LETTER O WITH STROKE + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0x00c2, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0x00e2, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0x00c0, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00e0, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x00c3, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x00e3, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00c8, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00e8, # LATIN SMALL LETTER C WITH CARON + 0x0112: 0x00c7, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0x00e7, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0x00cb, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0x00eb, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0x00c6, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00e6, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0x00cc, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0x00ec, # LATIN SMALL LETTER G WITH CEDILLA + 0x012a: 0x00ce, # LATIN CAPITAL LETTER I WITH MACRON + 0x012b: 0x00ee, # LATIN SMALL LETTER I WITH MACRON + 0x012e: 0x00c1, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012f: 0x00e1, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0x00cd, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0x00ed, # LATIN SMALL LETTER K WITH CEDILLA + 0x013b: 0x00cf, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013c: 0x00ef, # LATIN SMALL LETTER L WITH CEDILLA + 0x0141: 0x00d9, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x00f9, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00d1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00f1, # LATIN SMALL LETTER N WITH ACUTE + 0x0145: 0x00d2, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0x00f2, # LATIN SMALL LETTER N WITH CEDILLA + 0x014c: 0x00d4, # LATIN CAPITAL LETTER O WITH MACRON + 0x014d: 0x00f4, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0x00aa, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0x00ba, # LATIN SMALL LETTER R WITH CEDILLA + 0x015a: 0x00da, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x00fa, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0x00d0, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00f0, # LATIN SMALL LETTER S WITH CARON + 0x016a: 0x00db, # LATIN CAPITAL LETTER U WITH MACRON + 0x016b: 0x00fb, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0x00d8, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0x00f8, # LATIN SMALL LETTER U WITH OGONEK + 0x0179: 0x00ca, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00ea, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00dd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00fd, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00de, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00fe, # LATIN SMALL LETTER Z WITH CARON + 0x2019: 0x00ff, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0x00b4, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00a1, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00a5, # DOUBLE LOW-9 QUOTATION MARK +} \ No newline at end of file Index: iso8859_14.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_14.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- iso8859_14.py 8 Aug 2002 20:19:19 -0000 1.4 +++ iso8859_14.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-14.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-14.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,39 +32,557 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE - 0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE - 0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE - 0x00a5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE - 0x00a6: 0x1e0a, # LATIN CAPITAL LETTER D WITH DOT ABOVE - 0x00a8: 0x1e80, # LATIN CAPITAL LETTER W WITH GRAVE - 0x00aa: 0x1e82, # LATIN CAPITAL LETTER W WITH ACUTE - 0x00ab: 0x1e0b, # LATIN SMALL LETTER D WITH DOT ABOVE - 0x00ac: 0x1ef2, # LATIN CAPITAL LETTER Y WITH GRAVE - 0x00af: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00b0: 0x1e1e, # LATIN CAPITAL LETTER F WITH DOT ABOVE - 0x00b1: 0x1e1f, # LATIN SMALL LETTER F WITH DOT ABOVE - 0x00b2: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE - 0x00b3: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE - 0x00b4: 0x1e40, # LATIN CAPITAL LETTER M WITH DOT ABOVE - 0x00b5: 0x1e41, # LATIN SMALL LETTER M WITH DOT ABOVE - 0x00b7: 0x1e56, # LATIN CAPITAL LETTER P WITH DOT ABOVE - 0x00b8: 0x1e81, # LATIN SMALL LETTER W WITH GRAVE - 0x00b9: 0x1e57, # LATIN SMALL LETTER P WITH DOT ABOVE - 0x00ba: 0x1e83, # LATIN SMALL LETTER W WITH ACUTE - 0x00bb: 0x1e60, # LATIN CAPITAL LETTER S WITH DOT ABOVE - 0x00bc: 0x1ef3, # LATIN SMALL LETTER Y WITH GRAVE - 0x00bd: 0x1e84, # LATIN CAPITAL LETTER W WITH DIAERESIS - 0x00be: 0x1e85, # LATIN SMALL LETTER W WITH DIAERESIS - 0x00bf: 0x1e61, # LATIN SMALL LETTER S WITH DOT ABOVE - 0x00d0: 0x0174, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX - 0x00d7: 0x1e6a, # LATIN CAPITAL LETTER T WITH DOT ABOVE - 0x00de: 0x0176, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX - 0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX - 0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE - 0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX + 0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE + 0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE + 0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE + 0x00a5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE + 0x00a6: 0x1e0a, # LATIN CAPITAL LETTER D WITH DOT ABOVE + 0x00a8: 0x1e80, # LATIN CAPITAL LETTER W WITH GRAVE + 0x00aa: 0x1e82, # LATIN CAPITAL LETTER W WITH ACUTE + 0x00ab: 0x1e0b, # LATIN SMALL LETTER D WITH DOT ABOVE + 0x00ac: 0x1ef2, # LATIN CAPITAL LETTER Y WITH GRAVE + 0x00af: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00b0: 0x1e1e, # LATIN CAPITAL LETTER F WITH DOT ABOVE + 0x00b1: 0x1e1f, # LATIN SMALL LETTER F WITH DOT ABOVE + 0x00b2: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE + 0x00b3: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE + 0x00b4: 0x1e40, # LATIN CAPITAL LETTER M WITH DOT ABOVE + 0x00b5: 0x1e41, # LATIN SMALL LETTER M WITH DOT ABOVE + 0x00b7: 0x1e56, # LATIN CAPITAL LETTER P WITH DOT ABOVE + 0x00b8: 0x1e81, # LATIN SMALL LETTER W WITH GRAVE + 0x00b9: 0x1e57, # LATIN SMALL LETTER P WITH DOT ABOVE + 0x00ba: 0x1e83, # LATIN SMALL LETTER W WITH ACUTE + 0x00bb: 0x1e60, # LATIN CAPITAL LETTER S WITH DOT ABOVE + 0x00bc: 0x1ef3, # LATIN SMALL LETTER Y WITH GRAVE + 0x00bd: 0x1e84, # LATIN CAPITAL LETTER W WITH DIAERESIS + 0x00be: 0x1e85, # LATIN SMALL LETTER W WITH DIAERESIS + 0x00bf: 0x1e61, # LATIN SMALL LETTER S WITH DOT ABOVE + 0x00d0: 0x0174, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX + 0x00d7: 0x1e6a, # LATIN CAPITAL LETTER T WITH DOT ABOVE + 0x00de: 0x0176, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + 0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX + 0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE + 0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u1e02' # 0x00a1 -> LATIN CAPITAL LETTER B WITH DOT ABOVE + u'\u1e03' # 0x00a2 -> LATIN SMALL LETTER B WITH DOT ABOVE + u'\xa3' # 0x00a3 -> POUND SIGN + u'\u010a' # 0x00a4 -> LATIN CAPITAL LETTER C WITH DOT ABOVE + u'\u010b' # 0x00a5 -> LATIN SMALL LETTER C WITH DOT ABOVE + u'\u1e0a' # 0x00a6 -> LATIN CAPITAL LETTER D WITH DOT ABOVE + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\u1e80' # 0x00a8 -> LATIN CAPITAL LETTER W WITH GRAVE + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u1e82' # 0x00aa -> LATIN CAPITAL LETTER W WITH ACUTE + u'\u1e0b' # 0x00ab -> LATIN SMALL LETTER D WITH DOT ABOVE + u'\u1ef2' # 0x00ac -> LATIN CAPITAL LETTER Y WITH GRAVE + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\u0178' # 0x00af -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u1e1e' # 0x00b0 -> LATIN CAPITAL LETTER F WITH DOT ABOVE + u'\u1e1f' # 0x00b1 -> LATIN SMALL LETTER F WITH DOT ABOVE + u'\u0120' # 0x00b2 -> LATIN CAPITAL LETTER G WITH DOT ABOVE + u'\u0121' # 0x00b3 -> LATIN SMALL LETTER G WITH DOT ABOVE + u'\u1e40' # 0x00b4 -> LATIN CAPITAL LETTER M WITH DOT ABOVE + u'\u1e41' # 0x00b5 -> LATIN SMALL LETTER M WITH DOT ABOVE + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\u1e56' # 0x00b7 -> LATIN CAPITAL LETTER P WITH DOT ABOVE + u'\u1e81' # 0x00b8 -> LATIN SMALL LETTER W WITH GRAVE + u'\u1e57' # 0x00b9 -> LATIN SMALL LETTER P WITH DOT ABOVE + u'\u1e83' # 0x00ba -> LATIN SMALL LETTER W WITH ACUTE + u'\u1e60' # 0x00bb -> LATIN CAPITAL LETTER S WITH DOT ABOVE + u'\u1ef3' # 0x00bc -> LATIN SMALL LETTER Y WITH GRAVE + u'\u1e84' # 0x00bd -> LATIN CAPITAL LETTER W WITH DIAERESIS + u'\u1e85' # 0x00be -> LATIN SMALL LETTER W WITH DIAERESIS + u'\u1e61' # 0x00bf -> LATIN SMALL LETTER S WITH DOT ABOVE + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u0174' # 0x00d0 -> LATIN CAPITAL LETTER W WITH CIRCUMFLEX + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u1e6a' # 0x00d7 -> LATIN CAPITAL LETTER T WITH DOT ABOVE + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0x00dd -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0176' # 0x00de -> LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u0175' # 0x00f0 -> LATIN SMALL LETTER W WITH CIRCUMFLEX + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u1e6b' # 0x00f7 -> LATIN SMALL LETTER T WITH DOT ABOVE + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0x00fd -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0177' # 0x00fe -> LATIN SMALL LETTER Y WITH CIRCUMFLEX + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a3: 0x00a3, # POUND SIGN + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x010a: 0x00a4, # LATIN CAPITAL LETTER C WITH DOT ABOVE + 0x010b: 0x00a5, # LATIN SMALL LETTER C WITH DOT ABOVE + 0x0120: 0x00b2, # LATIN CAPITAL LETTER G WITH DOT ABOVE + 0x0121: 0x00b3, # LATIN SMALL LETTER G WITH DOT ABOVE + 0x0174: 0x00d0, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX + 0x0175: 0x00f0, # LATIN SMALL LETTER W WITH CIRCUMFLEX + 0x0176: 0x00de, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + 0x0177: 0x00fe, # LATIN SMALL LETTER Y WITH CIRCUMFLEX + 0x0178: 0x00af, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x1e02: 0x00a1, # LATIN CAPITAL LETTER B WITH DOT ABOVE + 0x1e03: 0x00a2, # LATIN SMALL LETTER B WITH DOT ABOVE + 0x1e0a: 0x00a6, # LATIN CAPITAL LETTER D WITH DOT ABOVE + 0x1e0b: 0x00ab, # LATIN SMALL LETTER D WITH DOT ABOVE + 0x1e1e: 0x00b0, # LATIN CAPITAL LETTER F WITH DOT ABOVE + 0x1e1f: 0x00b1, # LATIN SMALL LETTER F WITH DOT ABOVE + 0x1e40: 0x00b4, # LATIN CAPITAL LETTER M WITH DOT ABOVE + 0x1e41: 0x00b5, # LATIN SMALL LETTER M WITH DOT ABOVE + 0x1e56: 0x00b7, # LATIN CAPITAL LETTER P WITH DOT ABOVE + 0x1e57: 0x00b9, # LATIN SMALL LETTER P WITH DOT ABOVE + 0x1e60: 0x00bb, # LATIN CAPITAL LETTER S WITH DOT ABOVE + 0x1e61: 0x00bf, # LATIN SMALL LETTER S WITH DOT ABOVE + 0x1e6a: 0x00d7, # LATIN CAPITAL LETTER T WITH DOT ABOVE + 0x1e6b: 0x00f7, # LATIN SMALL LETTER T WITH DOT ABOVE + 0x1e80: 0x00a8, # LATIN CAPITAL LETTER W WITH GRAVE + 0x1e81: 0x00b8, # LATIN SMALL LETTER W WITH GRAVE + 0x1e82: 0x00aa, # LATIN CAPITAL LETTER W WITH ACUTE + 0x1e83: 0x00ba, # LATIN SMALL LETTER W WITH ACUTE + 0x1e84: 0x00bd, # LATIN CAPITAL LETTER W WITH DIAERESIS + 0x1e85: 0x00be, # LATIN SMALL LETTER W WITH DIAERESIS + 0x1ef2: 0x00ac, # LATIN CAPITAL LETTER Y WITH GRAVE + 0x1ef3: 0x00bc, # LATIN SMALL LETTER Y WITH GRAVE +} \ No newline at end of file Index: iso8859_15.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_15.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- iso8859_15.py 8 Aug 2002 20:19:19 -0000 1.4 +++ iso8859_15.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-15.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-15.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,16 +32,534 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a4: 0x20ac, # EURO SIGN - 0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00b4: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00b8: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x00bd: 0x0153, # LATIN SMALL LIGATURE OE - 0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00a4: 0x20ac, # EURO SIGN + 0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00b4: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00b8: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x00bd: 0x0153, # LATIN SMALL LIGATURE OE + 0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\xa1' # 0x00a1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\u20ac' # 0x00a4 -> EURO SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\u0160' # 0x00a6 -> LATIN CAPITAL LETTER S WITH CARON + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\u0161' # 0x00a8 -> LATIN SMALL LETTER S WITH CARON + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xaa' # 0x00aa -> FEMININE ORDINAL INDICATOR + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\u017d' # 0x00b4 -> LATIN CAPITAL LETTER Z WITH CARON + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\u017e' # 0x00b8 -> LATIN SMALL LETTER Z WITH CARON + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xba' # 0x00ba -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0152' # 0x00bc -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0x00bd -> LATIN SMALL LIGATURE OE + u'\u0178' # 0x00be -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xbf' # 0x00bf -> INVERTED QUESTION MARK + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0x00d0 -> LATIN CAPITAL LETTER ETH + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0x00dd -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0x00de -> LATIN CAPITAL LETTER THORN + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0x00f0 -> LATIN SMALL LETTER ETH + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0x00fd -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x00fe -> LATIN SMALL LETTER THORN + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00ba: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bf: 0x00bf, # INVERTED QUESTION MARK + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x00de, # LATIN CAPITAL LETTER THORN + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x00f0, # LATIN SMALL LETTER ETH + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x00fe, # LATIN SMALL LETTER THORN + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0152: 0x00bc, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x00bd, # LATIN SMALL LIGATURE OE + 0x0160: 0x00a6, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00a8, # LATIN SMALL LETTER S WITH CARON + 0x0178: 0x00be, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x017d: 0x00b4, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00b8, # LATIN SMALL LETTER Z WITH CARON + 0x20ac: 0x00a4, # EURO SIGN +} \ No newline at end of file Index: iso8859_16.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_16.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- iso8859_16.py 7 Aug 2004 06:03:09 -0000 1.2 +++ iso8859_16.py 21 Oct 2005 13:49:12 -0000 1.3 @@ -1,7 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-16.TXT' with gencodec.py. - - Generated from mapping found in - ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-16.TXT +""" Python Character Mapping Codec generated from 'ISO8859/8859-16.TXT' with gencodec.py. """#" @@ -17,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -35,48 +32,566 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a2: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00a4: 0x20ac, # EURO SIGN - 0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00aa: 0x0218, # LATIN CAPITAL LETTER S WITH COMMA BELOW - 0x00ac: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x00ae: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00b2: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00b4: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00b5: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00b8: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00b9: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ba: 0x0219, # LATIN SMALL LETTER S WITH COMMA BELOW - 0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x00bd: 0x0153, # LATIN SMALL LIGATURE OE - 0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00c5: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x00d7: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x00d8: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00dd: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00de: 0x021a, # LATIN CAPITAL LETTER T WITH COMMA BELOW - 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00e5: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x00f7: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x00f8: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00fd: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00fe: 0x021b, # LATIN SMALL LETTER T WITH COMMA BELOW + 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a2: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00a4: 0x20ac, # EURO SIGN + 0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00aa: 0x0218, # LATIN CAPITAL LETTER S WITH COMMA BELOW + 0x00ac: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x00ae: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00b2: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00b4: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00b5: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00b8: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00b9: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ba: 0x0219, # LATIN SMALL LETTER S WITH COMMA BELOW + 0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x00bd: 0x0153, # LATIN SMALL LIGATURE OE + 0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00c5: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x00d7: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x00d8: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x00dd: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00de: 0x021a, # LATIN CAPITAL LETTER T WITH COMMA BELOW + 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00e5: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x00f7: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x00f8: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x00fd: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00fe: 0x021b, # LATIN SMALL LETTER T WITH COMMA BELOW }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0104' # 0x00a1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0105' # 0x00a2 -> LATIN SMALL LETTER A WITH OGONEK + u'\u0141' # 0x00a3 -> LATIN CAPITAL LETTER L WITH STROKE + u'\u20ac' # 0x00a4 -> EURO SIGN + u'\u201e' # 0x00a5 -> DOUBLE LOW-9 QUOTATION MARK + u'\u0160' # 0x00a6 -> LATIN CAPITAL LETTER S WITH CARON + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\u0161' # 0x00a8 -> LATIN SMALL LETTER S WITH CARON + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u0218' # 0x00aa -> LATIN CAPITAL LETTER S WITH COMMA BELOW + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0179' # 0x00ac -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\u017a' # 0x00ae -> LATIN SMALL LETTER Z WITH ACUTE + u'\u017b' # 0x00af -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u010c' # 0x00b2 -> LATIN CAPITAL LETTER C WITH CARON + u'\u0142' # 0x00b3 -> LATIN SMALL LETTER L WITH STROKE + u'\u017d' # 0x00b4 -> LATIN CAPITAL LETTER Z WITH CARON + u'\u201d' # 0x00b5 -> RIGHT DOUBLE QUOTATION MARK + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\u017e' # 0x00b8 -> LATIN SMALL LETTER Z WITH CARON + u'\u010d' # 0x00b9 -> LATIN SMALL LETTER C WITH CARON + u'\u0219' # 0x00ba -> LATIN SMALL LETTER S WITH COMMA BELOW + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0152' # 0x00bc -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0x00bd -> LATIN SMALL LIGATURE OE + u'\u0178' # 0x00be -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u017c' # 0x00bf -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0x00c3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0106' # 0x00c5 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u0110' # 0x00d0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0143' # 0x00d1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0150' # 0x00d5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u015a' # 0x00d7 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u0170' # 0x00d8 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0118' # 0x00dd -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u021a' # 0x00de -> LATIN CAPITAL LETTER T WITH COMMA BELOW + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0x00e3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u0107' # 0x00e5 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u0111' # 0x00f0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0144' # 0x00f1 -> LATIN SMALL LETTER N WITH ACUTE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0151' # 0x00f5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u015b' # 0x00f7 -> LATIN SMALL LETTER S WITH ACUTE + u'\u0171' # 0x00f8 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0119' # 0x00fd -> LATIN SMALL LETTER E WITH OGONEK + u'\u021b' # 0x00fe -> LATIN SMALL LETTER T WITH COMMA BELOW + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0102: 0x00c3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0x00e3, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0x00a1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00a2, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x00c5, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x00e5, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00b2, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00b9, # LATIN SMALL LETTER C WITH CARON + 0x0110: 0x00d0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00f0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0x00dd, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00fd, # LATIN SMALL LETTER E WITH OGONEK + 0x0141: 0x00a3, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x00b3, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00d1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00f1, # LATIN SMALL LETTER N WITH ACUTE + 0x0150: 0x00d5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0x00f5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0152: 0x00bc, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x00bd, # LATIN SMALL LIGATURE OE + 0x015a: 0x00d7, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x00f7, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0x00a6, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00a8, # LATIN SMALL LETTER S WITH CARON + 0x0170: 0x00d8, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0x00f8, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0178: 0x00be, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0179: 0x00ac, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00ae, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00af, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00bf, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00b4, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00b8, # LATIN SMALL LETTER Z WITH CARON + 0x0218: 0x00aa, # LATIN CAPITAL LETTER S WITH COMMA BELOW + 0x0219: 0x00ba, # LATIN SMALL LETTER S WITH COMMA BELOW + 0x021a: 0x00de, # LATIN CAPITAL LETTER T WITH COMMA BELOW + 0x021b: 0x00fe, # LATIN SMALL LETTER T WITH COMMA BELOW + 0x201d: 0x00b5, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00a5, # DOUBLE LOW-9 QUOTATION MARK + 0x20ac: 0x00a4, # EURO SIGN +} \ No newline at end of file Index: iso8859_2.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_2.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- iso8859_2.py 8 Aug 2002 20:19:19 -0000 1.4 +++ iso8859_2.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-2.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-2.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,65 +32,583 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a2: 0x02d8, # BREVE - 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00a5: 0x013d, # LATIN CAPITAL LETTER L WITH CARON - 0x00a6: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00ab: 0x0164, # LATIN CAPITAL LETTER T WITH CARON - 0x00ac: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00b2: 0x02db, # OGONEK - 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00b5: 0x013e, # LATIN SMALL LETTER L WITH CARON - 0x00b6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x00b7: 0x02c7, # CARON - 0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00bb: 0x0165, # LATIN SMALL LETTER T WITH CARON - 0x00bc: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE - 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE - 0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON - 0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON - 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON - 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON - 0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE - 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE - 0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON - 0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON - 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON - 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON - 0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE - 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA - 0x00ff: 0x02d9, # DOT ABOVE + 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a2: 0x02d8, # BREVE + 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00a5: 0x013d, # LATIN CAPITAL LETTER L WITH CARON + 0x00a6: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00ab: 0x0164, # LATIN CAPITAL LETTER T WITH CARON + 0x00ac: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00b2: 0x02db, # OGONEK + 0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00b5: 0x013e, # LATIN SMALL LETTER L WITH CARON + 0x00b6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x00b7: 0x02c7, # CARON + 0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00bb: 0x0165, # LATIN SMALL LETTER T WITH CARON + 0x00bc: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE + 0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE + 0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON + 0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON + 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON + 0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON + 0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE + 0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE + 0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON + 0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON + 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON + 0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON + 0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE + 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA + 0x00ff: 0x02d9, # DOT ABOVE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0104' # 0x00a1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u02d8' # 0x00a2 -> BREVE + u'\u0141' # 0x00a3 -> LATIN CAPITAL LETTER L WITH STROKE + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\u013d' # 0x00a5 -> LATIN CAPITAL LETTER L WITH CARON + u'\u015a' # 0x00a6 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\u0160' # 0x00a9 -> LATIN CAPITAL LETTER S WITH CARON + u'\u015e' # 0x00aa -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u0164' # 0x00ab -> LATIN CAPITAL LETTER T WITH CARON + u'\u0179' # 0x00ac -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\u017d' # 0x00ae -> LATIN CAPITAL LETTER Z WITH CARON + u'\u017b' # 0x00af -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\u0105' # 0x00b1 -> LATIN SMALL LETTER A WITH OGONEK + u'\u02db' # 0x00b2 -> OGONEK + u'\u0142' # 0x00b3 -> LATIN SMALL LETTER L WITH STROKE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\u013e' # 0x00b5 -> LATIN SMALL LETTER L WITH CARON + u'\u015b' # 0x00b6 -> LATIN SMALL LETTER S WITH ACUTE + u'\u02c7' # 0x00b7 -> CARON + u'\xb8' # 0x00b8 -> CEDILLA + u'\u0161' # 0x00b9 -> LATIN SMALL LETTER S WITH CARON + u'\u015f' # 0x00ba -> LATIN SMALL LETTER S WITH CEDILLA + u'\u0165' # 0x00bb -> LATIN SMALL LETTER T WITH CARON + u'\u017a' # 0x00bc -> LATIN SMALL LETTER Z WITH ACUTE + u'\u02dd' # 0x00bd -> DOUBLE ACUTE ACCENT + u'\u017e' # 0x00be -> LATIN SMALL LETTER Z WITH CARON + u'\u017c' # 0x00bf -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u0154' # 0x00c0 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0x00c3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0139' # 0x00c5 -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u0106' # 0x00c6 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\u010c' # 0x00c8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0x00ca -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u011a' # 0x00cc -> LATIN CAPITAL LETTER E WITH CARON + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u010e' # 0x00cf -> LATIN CAPITAL LETTER D WITH CARON + u'\u0110' # 0x00d0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0143' # 0x00d1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0147' # 0x00d2 -> LATIN CAPITAL LETTER N WITH CARON + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0150' # 0x00d5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\u0158' # 0x00d8 -> LATIN CAPITAL LETTER R WITH CARON + u'\u016e' # 0x00d9 -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u0170' # 0x00db -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0x00dd -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0162' # 0x00de -> LATIN CAPITAL LETTER T WITH CEDILLA + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\u0155' # 0x00e0 -> LATIN SMALL LETTER R WITH ACUTE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0x00e3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u013a' # 0x00e5 -> LATIN SMALL LETTER L WITH ACUTE + u'\u0107' # 0x00e6 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\u010d' # 0x00e8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0x00ea -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u011b' # 0x00ec -> LATIN SMALL LETTER E WITH CARON + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u010f' # 0x00ef -> LATIN SMALL LETTER D WITH CARON + u'\u0111' # 0x00f0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0144' # 0x00f1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0148' # 0x00f2 -> LATIN SMALL LETTER N WITH CARON + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0151' # 0x00f5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\u0159' # 0x00f8 -> LATIN SMALL LETTER R WITH CARON + u'\u016f' # 0x00f9 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\u0171' # 0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0x00fd -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0163' # 0x00fe -> LATIN SMALL LETTER T WITH CEDILLA + u'\u02d9' # 0x00ff -> DOT ABOVE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b8: 0x00b8, # CEDILLA + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x0102: 0x00c3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0x00e3, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0x00a1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00b1, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x00c6, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x00e6, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00c8, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00e8, # LATIN SMALL LETTER C WITH CARON + 0x010e: 0x00cf, # LATIN CAPITAL LETTER D WITH CARON + 0x010f: 0x00ef, # LATIN SMALL LETTER D WITH CARON + 0x0110: 0x00d0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00f0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0x00ca, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00ea, # LATIN SMALL LETTER E WITH OGONEK + 0x011a: 0x00cc, # LATIN CAPITAL LETTER E WITH CARON + 0x011b: 0x00ec, # LATIN SMALL LETTER E WITH CARON + 0x0139: 0x00c5, # LATIN CAPITAL LETTER L WITH ACUTE + 0x013a: 0x00e5, # LATIN SMALL LETTER L WITH ACUTE + 0x013d: 0x00a5, # LATIN CAPITAL LETTER L WITH CARON + 0x013e: 0x00b5, # LATIN SMALL LETTER L WITH CARON + 0x0141: 0x00a3, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x00b3, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00d1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00f1, # LATIN SMALL LETTER N WITH ACUTE + 0x0147: 0x00d2, # LATIN CAPITAL LETTER N WITH CARON + 0x0148: 0x00f2, # LATIN SMALL LETTER N WITH CARON + 0x0150: 0x00d5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0x00f5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0154: 0x00c0, # LATIN CAPITAL LETTER R WITH ACUTE + 0x0155: 0x00e0, # LATIN SMALL LETTER R WITH ACUTE + 0x0158: 0x00d8, # LATIN CAPITAL LETTER R WITH CARON + 0x0159: 0x00f8, # LATIN SMALL LETTER R WITH CARON + 0x015a: 0x00a6, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x00b6, # LATIN SMALL LETTER S WITH ACUTE + 0x015e: 0x00aa, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00ba, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0x00a9, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00b9, # LATIN SMALL LETTER S WITH CARON + 0x0162: 0x00de, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x0163: 0x00fe, # LATIN SMALL LETTER T WITH CEDILLA + 0x0164: 0x00ab, # LATIN CAPITAL LETTER T WITH CARON + 0x0165: 0x00bb, # LATIN SMALL LETTER T WITH CARON + 0x016e: 0x00d9, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x016f: 0x00f9, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0170: 0x00db, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0x00fb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0179: 0x00ac, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00bc, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00af, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00bf, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00ae, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00be, # LATIN SMALL LETTER Z WITH CARON + 0x02c7: 0x00b7, # CARON + 0x02d8: 0x00a2, # BREVE + 0x02d9: 0x00ff, # DOT ABOVE + 0x02db: 0x00b2, # OGONEK + 0x02dd: 0x00bd, # DOUBLE ACUTE ACCENT +} \ No newline at end of file Index: iso8859_3.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_3.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- iso8859_3.py 8 Aug 2002 20:19:19 -0000 1.4 +++ iso8859_3.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-3.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-3.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,43 +32,554 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE - 0x00a2: 0x02d8, # BREVE - 0x00a5: None, - 0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX - 0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX - 0x00ae: None, - 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE - 0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX - 0x00b9: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX - 0x00be: None, - 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00c3: None, - 0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE - 0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX - 0x00d0: None, - 0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE - 0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX - 0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE - 0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX - 0x00e3: None, - 0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE - 0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX - 0x00f0: None, - 0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE - 0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX - 0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE - 0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX - 0x00ff: 0x02d9, # DOT ABOVE + 0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE + 0x00a2: 0x02d8, # BREVE + 0x00a5: None, + 0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX + 0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX + 0x00ae: None, + 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE + 0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX + 0x00b9: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX + 0x00be: None, + 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00c3: None, + 0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE + 0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX + 0x00d0: None, + 0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE + 0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX + 0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE + 0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX + 0x00e3: None, + 0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE + 0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX + 0x00f0: None, + 0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE + 0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX + 0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE + 0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX + 0x00ff: 0x02d9, # DOT ABOVE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0126' # 0x00a1 -> LATIN CAPITAL LETTER H WITH STROKE + u'\u02d8' # 0x00a2 -> BREVE + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\ufffe' + u'\u0124' # 0x00a6 -> LATIN CAPITAL LETTER H WITH CIRCUMFLEX + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\u0130' # 0x00a9 -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u015e' # 0x00aa -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u011e' # 0x00ab -> LATIN CAPITAL LETTER G WITH BREVE + u'\u0134' # 0x00ac -> LATIN CAPITAL LETTER J WITH CIRCUMFLEX + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\ufffe' + u'\u017b' # 0x00af -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\u0127' # 0x00b1 -> LATIN SMALL LETTER H WITH STROKE + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\u0125' # 0x00b6 -> LATIN SMALL LETTER H WITH CIRCUMFLEX + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\u0131' # 0x00b9 -> LATIN SMALL LETTER DOTLESS I + u'\u015f' # 0x00ba -> LATIN SMALL LETTER S WITH CEDILLA + u'\u011f' # 0x00bb -> LATIN SMALL LETTER G WITH BREVE + u'\u0135' # 0x00bc -> LATIN SMALL LETTER J WITH CIRCUMFLEX + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\ufffe' + u'\u017c' # 0x00bf -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\ufffe' + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u010a' # 0x00c5 -> LATIN CAPITAL LETTER C WITH DOT ABOVE + u'\u0108' # 0x00c6 -> LATIN CAPITAL LETTER C WITH CIRCUMFLEX + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\ufffe' + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0120' # 0x00d5 -> LATIN CAPITAL LETTER G WITH DOT ABOVE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\u011c' # 0x00d8 -> LATIN CAPITAL LETTER G WITH CIRCUMFLEX + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u016c' # 0x00dd -> LATIN CAPITAL LETTER U WITH BREVE + u'\u015c' # 0x00de -> LATIN CAPITAL LETTER S WITH CIRCUMFLEX + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\ufffe' + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u010b' # 0x00e5 -> LATIN SMALL LETTER C WITH DOT ABOVE + u'\u0109' # 0x00e6 -> LATIN SMALL LETTER C WITH CIRCUMFLEX + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\ufffe' + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0121' # 0x00f5 -> LATIN SMALL LETTER G WITH DOT ABOVE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\u011d' # 0x00f8 -> LATIN SMALL LETTER G WITH CIRCUMFLEX + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u016d' # 0x00fd -> LATIN SMALL LETTER U WITH BREVE + u'\u015d' # 0x00fe -> LATIN SMALL LETTER S WITH CIRCUMFLEX + u'\u02d9' # 0x00ff -> DOT ABOVE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0108: 0x00c6, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX + 0x0109: 0x00e6, # LATIN SMALL LETTER C WITH CIRCUMFLEX + 0x010a: 0x00c5, # LATIN CAPITAL LETTER C WITH DOT ABOVE + 0x010b: 0x00e5, # LATIN SMALL LETTER C WITH DOT ABOVE + 0x011c: 0x00d8, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX + 0x011d: 0x00f8, # LATIN SMALL LETTER G WITH CIRCUMFLEX + 0x011e: 0x00ab, # LATIN CAPITAL LETTER G WITH BREVE + 0x011f: 0x00bb, # LATIN SMALL LETTER G WITH BREVE + 0x0120: 0x00d5, # LATIN CAPITAL LETTER G WITH DOT ABOVE + 0x0121: 0x00f5, # LATIN SMALL LETTER G WITH DOT ABOVE + 0x0124: 0x00a6, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX + 0x0125: 0x00b6, # LATIN SMALL LETTER H WITH CIRCUMFLEX + 0x0126: 0x00a1, # LATIN CAPITAL LETTER H WITH STROKE + 0x0127: 0x00b1, # LATIN SMALL LETTER H WITH STROKE + 0x0130: 0x00a9, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0x00b9, # LATIN SMALL LETTER DOTLESS I + 0x0134: 0x00ac, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX + 0x0135: 0x00bc, # LATIN SMALL LETTER J WITH CIRCUMFLEX + 0x015c: 0x00de, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX + 0x015d: 0x00fe, # LATIN SMALL LETTER S WITH CIRCUMFLEX + 0x015e: 0x00aa, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00ba, # LATIN SMALL LETTER S WITH CEDILLA + 0x016c: 0x00dd, # LATIN CAPITAL LETTER U WITH BREVE + 0x016d: 0x00fd, # LATIN SMALL LETTER U WITH BREVE + 0x017b: 0x00af, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00bf, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x02d8: 0x00a2, # BREVE + 0x02d9: 0x00ff, # DOT ABOVE +} \ No newline at end of file Index: iso8859_4.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_4.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- iso8859_4.py 8 Aug 2002 20:19:19 -0000 1.4 +++ iso8859_4.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-4.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-4.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,58 +32,576 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a2: 0x0138, # LATIN SMALL LETTER KRA - 0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE - 0x00a6: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00aa: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00ab: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x00ac: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE - 0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00b2: 0x02db, # OGONEK - 0x00b3: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE - 0x00b6: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00b7: 0x02c7, # CARON - 0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00ba: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x00bb: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x00bc: 0x0167, # LATIN SMALL LETTER T WITH STROKE - 0x00bd: 0x014a, # LATIN CAPITAL LETTER ENG - 0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00bf: 0x014b, # LATIN SMALL LETTER ENG - 0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00cf: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00d3: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00dd: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE - 0x00de: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00ef: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x00f3: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE - 0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00ff: 0x02d9, # DOT ABOVE + 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a2: 0x0138, # LATIN SMALL LETTER KRA + 0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE + 0x00a6: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00aa: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00ab: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x00ac: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE + 0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00b2: 0x02db, # OGONEK + 0x00b3: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE + 0x00b6: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00b7: 0x02c7, # CARON + 0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00ba: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x00bb: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x00bc: 0x0167, # LATIN SMALL LETTER T WITH STROKE + 0x00bd: 0x014a, # LATIN CAPITAL LETTER ENG + 0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00bf: 0x014b, # LATIN SMALL LETTER ENG + 0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00cf: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00d3: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00dd: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE + 0x00de: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00ef: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x00f3: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE + 0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00ff: 0x02d9, # DOT ABOVE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0104' # 0x00a1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0138' # 0x00a2 -> LATIN SMALL LETTER KRA + u'\u0156' # 0x00a3 -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\u0128' # 0x00a5 -> LATIN CAPITAL LETTER I WITH TILDE + u'\u013b' # 0x00a6 -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\u0160' # 0x00a9 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0112' # 0x00aa -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0122' # 0x00ab -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u0166' # 0x00ac -> LATIN CAPITAL LETTER T WITH STROKE + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\u017d' # 0x00ae -> LATIN CAPITAL LETTER Z WITH CARON + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\u0105' # 0x00b1 -> LATIN SMALL LETTER A WITH OGONEK + u'\u02db' # 0x00b2 -> OGONEK + u'\u0157' # 0x00b3 -> LATIN SMALL LETTER R WITH CEDILLA + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\u0129' # 0x00b5 -> LATIN SMALL LETTER I WITH TILDE + u'\u013c' # 0x00b6 -> LATIN SMALL LETTER L WITH CEDILLA + u'\u02c7' # 0x00b7 -> CARON + u'\xb8' # 0x00b8 -> CEDILLA + u'\u0161' # 0x00b9 -> LATIN SMALL LETTER S WITH CARON + u'\u0113' # 0x00ba -> LATIN SMALL LETTER E WITH MACRON + u'\u0123' # 0x00bb -> LATIN SMALL LETTER G WITH CEDILLA + u'\u0167' # 0x00bc -> LATIN SMALL LETTER T WITH STROKE + u'\u014a' # 0x00bd -> LATIN CAPITAL LETTER ENG + u'\u017e' # 0x00be -> LATIN SMALL LETTER Z WITH CARON + u'\u014b' # 0x00bf -> LATIN SMALL LETTER ENG + u'\u0100' # 0x00c0 -> LATIN CAPITAL LETTER A WITH MACRON + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\u012e' # 0x00c7 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u010c' # 0x00c8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0x00ca -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u0116' # 0x00cc -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u012a' # 0x00cf -> LATIN CAPITAL LETTER I WITH MACRON + u'\u0110' # 0x00d0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0145' # 0x00d1 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u014c' # 0x00d2 -> LATIN CAPITAL LETTER O WITH MACRON + u'\u0136' # 0x00d3 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\u0172' # 0x00d9 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0168' # 0x00dd -> LATIN CAPITAL LETTER U WITH TILDE + u'\u016a' # 0x00de -> LATIN CAPITAL LETTER U WITH MACRON + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\u0101' # 0x00e0 -> LATIN SMALL LETTER A WITH MACRON + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\u012f' # 0x00e7 -> LATIN SMALL LETTER I WITH OGONEK + u'\u010d' # 0x00e8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0x00ea -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0117' # 0x00ec -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u012b' # 0x00ef -> LATIN SMALL LETTER I WITH MACRON + u'\u0111' # 0x00f0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0146' # 0x00f1 -> LATIN SMALL LETTER N WITH CEDILLA + u'\u014d' # 0x00f2 -> LATIN SMALL LETTER O WITH MACRON + u'\u0137' # 0x00f3 -> LATIN SMALL LETTER K WITH CEDILLA + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\u0173' # 0x00f9 -> LATIN SMALL LETTER U WITH OGONEK + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0169' # 0x00fd -> LATIN SMALL LETTER U WITH TILDE + u'\u016b' # 0x00fe -> LATIN SMALL LETTER U WITH MACRON + u'\u02d9' # 0x00ff -> DOT ABOVE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b8: 0x00b8, # CEDILLA + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0x00c0, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0x00e0, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0x00a1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00b1, # LATIN SMALL LETTER A WITH OGONEK + 0x010c: 0x00c8, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00e8, # LATIN SMALL LETTER C WITH CARON + 0x0110: 0x00d0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00f0, # LATIN SMALL LETTER D WITH STROKE + 0x0112: 0x00aa, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0x00ba, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0x00cc, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0x00ec, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0x00ca, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00ea, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0x00ab, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0x00bb, # LATIN SMALL LETTER G WITH CEDILLA + 0x0128: 0x00a5, # LATIN CAPITAL LETTER I WITH TILDE + 0x0129: 0x00b5, # LATIN SMALL LETTER I WITH TILDE + 0x012a: 0x00cf, # LATIN CAPITAL LETTER I WITH MACRON + 0x012b: 0x00ef, # LATIN SMALL LETTER I WITH MACRON + 0x012e: 0x00c7, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012f: 0x00e7, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0x00d3, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0x00f3, # LATIN SMALL LETTER K WITH CEDILLA + 0x0138: 0x00a2, # LATIN SMALL LETTER KRA + 0x013b: 0x00a6, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013c: 0x00b6, # LATIN SMALL LETTER L WITH CEDILLA + 0x0145: 0x00d1, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0x00f1, # LATIN SMALL LETTER N WITH CEDILLA + 0x014a: 0x00bd, # LATIN CAPITAL LETTER ENG + 0x014b: 0x00bf, # LATIN SMALL LETTER ENG + 0x014c: 0x00d2, # LATIN CAPITAL LETTER O WITH MACRON + 0x014d: 0x00f2, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0x00a3, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0x00b3, # LATIN SMALL LETTER R WITH CEDILLA + 0x0160: 0x00a9, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00b9, # LATIN SMALL LETTER S WITH CARON + 0x0166: 0x00ac, # LATIN CAPITAL LETTER T WITH STROKE + 0x0167: 0x00bc, # LATIN SMALL LETTER T WITH STROKE + 0x0168: 0x00dd, # LATIN CAPITAL LETTER U WITH TILDE + 0x0169: 0x00fd, # LATIN SMALL LETTER U WITH TILDE + 0x016a: 0x00de, # LATIN CAPITAL LETTER U WITH MACRON + 0x016b: 0x00fe, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0x00d9, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0x00f9, # LATIN SMALL LETTER U WITH OGONEK + 0x017d: 0x00ae, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00be, # LATIN SMALL LETTER Z WITH CARON + 0x02c7: 0x00b7, # CARON + 0x02d9: 0x00ff, # DOT ABOVE + 0x02db: 0x00b2, # OGONEK +} \ No newline at end of file Index: iso8859_5.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_5.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- iso8859_5.py 8 Aug 2002 20:19:19 -0000 1.4 +++ iso8859_5.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-5.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-5.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,102 +32,620 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE - 0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE - 0x00a4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00a5: 0x0405, # CYRILLIC CAPITAL LETTER DZE - 0x00a6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00a7: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00a8: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x00a9: 0x0409, # CYRILLIC CAPITAL LETTER LJE - 0x00aa: 0x040a, # CYRILLIC CAPITAL LETTER NJE - 0x00ab: 0x040b, # CYRILLIC CAPITAL LETTER TSHE - 0x00ac: 0x040c, # CYRILLIC CAPITAL LETTER KJE - 0x00ae: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00af: 0x040f, # CYRILLIC CAPITAL LETTER DZHE - 0x00b0: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00b1: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00b2: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00b3: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00b4: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00b5: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00b6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00b7: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00b9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00ba: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00bb: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00bc: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00bd: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00be: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00bf: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00c0: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00c1: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00c2: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00c3: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00c4: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00c5: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00c6: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00c7: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00c8: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00c9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00ca: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x00cb: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00cc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00cd: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00ce: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x00cf: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00d0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00d1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00d2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00d3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00d4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00d5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00d7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00d8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00d9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00da: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00db: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00dc: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00dd: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00de: 0x043e, # CYRILLIC SMALL LETTER O - 0x00df: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E - 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00f0: 0x2116, # NUMERO SIGN - 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00f2: 0x0452, # CYRILLIC SMALL LETTER DJE - 0x00f3: 0x0453, # CYRILLIC SMALL LETTER GJE - 0x00f4: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00f5: 0x0455, # CYRILLIC SMALL LETTER DZE - 0x00f6: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00f7: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00f8: 0x0458, # CYRILLIC SMALL LETTER JE - 0x00f9: 0x0459, # CYRILLIC SMALL LETTER LJE - 0x00fa: 0x045a, # CYRILLIC SMALL LETTER NJE - 0x00fb: 0x045b, # CYRILLIC SMALL LETTER TSHE - 0x00fc: 0x045c, # CYRILLIC SMALL LETTER KJE - 0x00fd: 0x00a7, # SECTION SIGN - 0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE + 0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE + 0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE + 0x00a4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x00a5: 0x0405, # CYRILLIC CAPITAL LETTER DZE + 0x00a6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00a7: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x00a8: 0x0408, # CYRILLIC CAPITAL LETTER JE + 0x00a9: 0x0409, # CYRILLIC CAPITAL LETTER LJE + 0x00aa: 0x040a, # CYRILLIC CAPITAL LETTER NJE + 0x00ab: 0x040b, # CYRILLIC CAPITAL LETTER TSHE + 0x00ac: 0x040c, # CYRILLIC CAPITAL LETTER KJE + 0x00ae: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x00af: 0x040f, # CYRILLIC CAPITAL LETTER DZHE + 0x00b0: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x00b1: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x00b2: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x00b3: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x00b4: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x00b5: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x00b6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x00b7: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x00b9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x00ba: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x00bb: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x00bc: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x00bd: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x00be: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x00bf: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x00c0: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x00c1: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x00c2: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x00c3: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x00c4: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x00c5: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x00c6: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x00c7: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x00c8: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x00c9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x00ca: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x00cb: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x00cc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x00cd: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x00ce: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x00cf: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00d0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00d1: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00d2: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00d3: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00d4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00d5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00d7: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00d8: 0x0438, # CYRILLIC SMALL LETTER I + 0x00d9: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00da: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00db: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00dc: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00dd: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00de: 0x043e, # CYRILLIC SMALL LETTER O + 0x00df: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U + 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E + 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00f0: 0x2116, # NUMERO SIGN + 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00f2: 0x0452, # CYRILLIC SMALL LETTER DJE + 0x00f3: 0x0453, # CYRILLIC SMALL LETTER GJE + 0x00f4: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x00f5: 0x0455, # CYRILLIC SMALL LETTER DZE + 0x00f6: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00f7: 0x0457, # CYRILLIC SMALL LETTER YI + 0x00f8: 0x0458, # CYRILLIC SMALL LETTER JE + 0x00f9: 0x0459, # CYRILLIC SMALL LETTER LJE + 0x00fa: 0x045a, # CYRILLIC SMALL LETTER NJE + 0x00fb: 0x045b, # CYRILLIC SMALL LETTER TSHE + 0x00fc: 0x045c, # CYRILLIC SMALL LETTER KJE + 0x00fd: 0x00a7, # SECTION SIGN + 0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u0401' # 0x00a1 -> CYRILLIC CAPITAL LETTER IO + u'\u0402' # 0x00a2 -> CYRILLIC CAPITAL LETTER DJE + u'\u0403' # 0x00a3 -> CYRILLIC CAPITAL LETTER GJE + u'\u0404' # 0x00a4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0405' # 0x00a5 -> CYRILLIC CAPITAL LETTER DZE + u'\u0406' # 0x00a6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0407' # 0x00a7 -> CYRILLIC CAPITAL LETTER YI + u'\u0408' # 0x00a8 -> CYRILLIC CAPITAL LETTER JE + u'\u0409' # 0x00a9 -> CYRILLIC CAPITAL LETTER LJE + u'\u040a' # 0x00aa -> CYRILLIC CAPITAL LETTER NJE + u'\u040b' # 0x00ab -> CYRILLIC CAPITAL LETTER TSHE + u'\u040c' # 0x00ac -> CYRILLIC CAPITAL LETTER KJE + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\u040e' # 0x00ae -> CYRILLIC CAPITAL LETTER SHORT U + u'\u040f' # 0x00af -> CYRILLIC CAPITAL LETTER DZHE + u'\u0410' # 0x00b0 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0x00b1 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0x00b2 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0x00b3 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0x00b4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0x00b5 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0x00b6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0x00b7 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0x00b8 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0x00b9 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0x00ba -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0x00bb -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0x00bc -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0x00bd -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0x00be -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0x00bf -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0x00c0 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0x00c1 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0x00c2 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0x00c3 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0x00c4 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0x00c5 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0x00c6 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0x00c7 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0x00c8 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0x00c9 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0x00ca -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0x00cb -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0x00cc -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0x00cd -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0x00ce -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0x00cf -> CYRILLIC CAPITAL LETTER YA + u'\u0430' # 0x00d0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0x00d1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0x00d2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0x00d3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0x00d4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0x00d5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0x00d6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0x00d7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0x00d8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0x00d9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0x00da -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0x00db -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0x00dc -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0x00dd -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0x00de -> CYRILLIC SMALL LETTER O + u'\u043f' # 0x00df -> CYRILLIC SMALL LETTER PE + u'\u0440' # 0x00e0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0x00e1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0x00e2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0x00e3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0x00e4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0x00e5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0x00e6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0x00e7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0x00e8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0x00e9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0x00ea -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0x00eb -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0x00ed -> CYRILLIC SMALL LETTER E + u'\u044e' # 0x00ee -> CYRILLIC SMALL LETTER YU + u'\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA + u'\u2116' # 0x00f0 -> NUMERO SIGN + u'\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO + u'\u0452' # 0x00f2 -> CYRILLIC SMALL LETTER DJE + u'\u0453' # 0x00f3 -> CYRILLIC SMALL LETTER GJE + u'\u0454' # 0x00f4 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0455' # 0x00f5 -> CYRILLIC SMALL LETTER DZE + u'\u0456' # 0x00f6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0457' # 0x00f7 -> CYRILLIC SMALL LETTER YI + u'\u0458' # 0x00f8 -> CYRILLIC SMALL LETTER JE + u'\u0459' # 0x00f9 -> CYRILLIC SMALL LETTER LJE + u'\u045a' # 0x00fa -> CYRILLIC SMALL LETTER NJE + u'\u045b' # 0x00fb -> CYRILLIC SMALL LETTER TSHE + u'\u045c' # 0x00fc -> CYRILLIC SMALL LETTER KJE + u'\xa7' # 0x00fd -> SECTION SIGN + u'\u045e' # 0x00fe -> CYRILLIC SMALL LETTER SHORT U + u'\u045f' # 0x00ff -> CYRILLIC SMALL LETTER DZHE +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a7: 0x00fd, # SECTION SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x0401: 0x00a1, # CYRILLIC CAPITAL LETTER IO + 0x0402: 0x00a2, # CYRILLIC CAPITAL LETTER DJE + 0x0403: 0x00a3, # CYRILLIC CAPITAL LETTER GJE + 0x0404: 0x00a4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0x00a5, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0x00a6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0x00a7, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0x00a8, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0x00a9, # CYRILLIC CAPITAL LETTER LJE + 0x040a: 0x00aa, # CYRILLIC CAPITAL LETTER NJE + 0x040b: 0x00ab, # CYRILLIC CAPITAL LETTER TSHE + 0x040c: 0x00ac, # CYRILLIC CAPITAL LETTER KJE + 0x040e: 0x00ae, # CYRILLIC CAPITAL LETTER SHORT U + 0x040f: 0x00af, # CYRILLIC CAPITAL LETTER DZHE + 0x0410: 0x00b0, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x00b1, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x00b2, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x00b3, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x00b4, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x00b5, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x00b6, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x00b7, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x00b8, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x00b9, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x00ba, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x00bb, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x00bc, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x00bd, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x00be, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x00bf, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x00c0, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x00c1, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x00c2, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x00c3, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x00c4, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x00c5, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x00c6, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x00c7, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x00c8, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x00c9, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x00ca, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x00cb, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x00cc, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x00cd, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x00ce, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x00cf, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00d0, # CYRILLIC SMALL LETTER A + 0x0431: 0x00d1, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00d2, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00d3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00d4, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00d5, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00d6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00d7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00d8, # CYRILLIC SMALL LETTER I + 0x0439: 0x00d9, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00da, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00db, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00dc, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00dd, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00de, # CYRILLIC SMALL LETTER O + 0x043f: 0x00df, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00e0, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00e1, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00e2, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00e3, # CYRILLIC SMALL LETTER U + 0x0444: 0x00e4, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00e5, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00e6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00e7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00e8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00e9, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x00ea, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00eb, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00ec, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00ed, # CYRILLIC SMALL LETTER E + 0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA + 0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO + 0x0452: 0x00f2, # CYRILLIC SMALL LETTER DJE + 0x0453: 0x00f3, # CYRILLIC SMALL LETTER GJE + 0x0454: 0x00f4, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0x00f5, # CYRILLIC SMALL LETTER DZE + 0x0456: 0x00f6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0x00f7, # CYRILLIC SMALL LETTER YI + 0x0458: 0x00f8, # CYRILLIC SMALL LETTER JE + 0x0459: 0x00f9, # CYRILLIC SMALL LETTER LJE + 0x045a: 0x00fa, # CYRILLIC SMALL LETTER NJE + 0x045b: 0x00fb, # CYRILLIC SMALL LETTER TSHE + 0x045c: 0x00fc, # CYRILLIC SMALL LETTER KJE + 0x045e: 0x00fe, # CYRILLIC SMALL LETTER SHORT U + 0x045f: 0x00ff, # CYRILLIC SMALL LETTER DZHE + 0x2116: 0x00f0, # NUMERO SIGN +} \ No newline at end of file Index: iso8859_6.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_6.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- iso8859_6.py 8 Aug 2002 20:19:19 -0000 1.4 +++ iso8859_6.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-6.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-6.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,101 +32,574 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: None, - 0x00a2: None, - 0x00a3: None, - 0x00a5: None, - 0x00a6: None, - 0x00a7: None, - 0x00a8: None, - 0x00a9: None, - 0x00aa: None, - 0x00ab: None, - 0x00ac: 0x060c, # ARABIC COMMA - 0x00ae: None, - 0x00af: None, - 0x00b0: None, - 0x00b1: None, - 0x00b2: None, - 0x00b3: None, - 0x00b4: None, - 0x00b5: None, - 0x00b6: None, - 0x00b7: None, - 0x00b8: None, - 0x00b9: None, - 0x00ba: None, - 0x00bb: 0x061b, # ARABIC SEMICOLON - 0x00bc: None, - 0x00bd: None, - 0x00be: None, - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: None, - 0x00c1: 0x0621, # ARABIC LETTER HAMZA - 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x00c7: 0x0627, # ARABIC LETTER ALEF - 0x00c8: 0x0628, # ARABIC LETTER BEH - 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA - 0x00ca: 0x062a, # ARABIC LETTER TEH - 0x00cb: 0x062b, # ARABIC LETTER THEH - 0x00cc: 0x062c, # ARABIC LETTER JEEM - 0x00cd: 0x062d, # ARABIC LETTER HAH - 0x00ce: 0x062e, # ARABIC LETTER KHAH - 0x00cf: 0x062f, # ARABIC LETTER DAL - 0x00d0: 0x0630, # ARABIC LETTER THAL - 0x00d1: 0x0631, # ARABIC LETTER REH - 0x00d2: 0x0632, # ARABIC LETTER ZAIN - 0x00d3: 0x0633, # ARABIC LETTER SEEN - 0x00d4: 0x0634, # ARABIC LETTER SHEEN - 0x00d5: 0x0635, # ARABIC LETTER SAD - 0x00d6: 0x0636, # ARABIC LETTER DAD - 0x00d7: 0x0637, # ARABIC LETTER TAH - 0x00d8: 0x0638, # ARABIC LETTER ZAH - 0x00d9: 0x0639, # ARABIC LETTER AIN - 0x00da: 0x063a, # ARABIC LETTER GHAIN - 0x00db: None, - 0x00dc: None, - 0x00dd: None, - 0x00de: None, - 0x00df: None, - 0x00e0: 0x0640, # ARABIC TATWEEL - 0x00e1: 0x0641, # ARABIC LETTER FEH - 0x00e2: 0x0642, # ARABIC LETTER QAF - 0x00e3: 0x0643, # ARABIC LETTER KAF - 0x00e4: 0x0644, # ARABIC LETTER LAM - 0x00e5: 0x0645, # ARABIC LETTER MEEM - 0x00e6: 0x0646, # ARABIC LETTER NOON - 0x00e7: 0x0647, # ARABIC LETTER HEH - 0x00e8: 0x0648, # ARABIC LETTER WAW - 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA - 0x00ea: 0x064a, # ARABIC LETTER YEH - 0x00eb: 0x064b, # ARABIC FATHATAN - 0x00ec: 0x064c, # ARABIC DAMMATAN - 0x00ed: 0x064d, # ARABIC KASRATAN - 0x00ee: 0x064e, # ARABIC FATHA - 0x00ef: 0x064f, # ARABIC DAMMA - 0x00f0: 0x0650, # ARABIC KASRA - 0x00f1: 0x0651, # ARABIC SHADDA - 0x00f2: 0x0652, # ARABIC SUKUN - 0x00f3: None, - 0x00f4: None, - 0x00f5: None, - 0x00f6: None, - 0x00f7: None, - 0x00f8: None, - 0x00f9: None, - 0x00fa: None, - 0x00fb: None, - 0x00fc: None, - 0x00fd: None, - 0x00fe: None, - 0x00ff: None, + 0x00a1: None, + 0x00a2: None, + 0x00a3: None, + 0x00a5: None, + 0x00a6: None, + 0x00a7: None, + 0x00a8: None, + 0x00a9: None, + 0x00aa: None, + 0x00ab: None, + 0x00ac: 0x060c, # ARABIC COMMA + 0x00ae: None, + 0x00af: None, + 0x00b0: None, + 0x00b1: None, + 0x00b2: None, + 0x00b3: None, + 0x00b4: None, + 0x00b5: None, + 0x00b6: None, + 0x00b7: None, + 0x00b8: None, + 0x00b9: None, + 0x00ba: None, + 0x00bb: 0x061b, # ARABIC SEMICOLON + 0x00bc: None, + 0x00bd: None, + 0x00be: None, + 0x00bf: 0x061f, # ARABIC QUESTION MARK + 0x00c0: None, + 0x00c1: 0x0621, # ARABIC LETTER HAMZA + 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x00c7: 0x0627, # ARABIC LETTER ALEF + 0x00c8: 0x0628, # ARABIC LETTER BEH + 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA + 0x00ca: 0x062a, # ARABIC LETTER TEH + 0x00cb: 0x062b, # ARABIC LETTER THEH + 0x00cc: 0x062c, # ARABIC LETTER JEEM + 0x00cd: 0x062d, # ARABIC LETTER HAH + 0x00ce: 0x062e, # ARABIC LETTER KHAH + 0x00cf: 0x062f, # ARABIC LETTER DAL + 0x00d0: 0x0630, # ARABIC LETTER THAL + 0x00d1: 0x0631, # ARABIC LETTER REH + 0x00d2: 0x0632, # ARABIC LETTER ZAIN + 0x00d3: 0x0633, # ARABIC LETTER SEEN + 0x00d4: 0x0634, # ARABIC LETTER SHEEN + 0x00d5: 0x0635, # ARABIC LETTER SAD + 0x00d6: 0x0636, # ARABIC LETTER DAD + 0x00d7: 0x0637, # ARABIC LETTER TAH + 0x00d8: 0x0638, # ARABIC LETTER ZAH + 0x00d9: 0x0639, # ARABIC LETTER AIN + 0x00da: 0x063a, # ARABIC LETTER GHAIN + 0x00db: None, + 0x00dc: None, + 0x00dd: None, + 0x00de: None, + 0x00df: None, + 0x00e0: 0x0640, # ARABIC TATWEEL + 0x00e1: 0x0641, # ARABIC LETTER FEH + 0x00e2: 0x0642, # ARABIC LETTER QAF + 0x00e3: 0x0643, # ARABIC LETTER KAF + 0x00e4: 0x0644, # ARABIC LETTER LAM + 0x00e5: 0x0645, # ARABIC LETTER MEEM + 0x00e6: 0x0646, # ARABIC LETTER NOON + 0x00e7: 0x0647, # ARABIC LETTER HEH + 0x00e8: 0x0648, # ARABIC LETTER WAW + 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA + 0x00ea: 0x064a, # ARABIC LETTER YEH + 0x00eb: 0x064b, # ARABIC FATHATAN + 0x00ec: 0x064c, # ARABIC DAMMATAN + 0x00ed: 0x064d, # ARABIC KASRATAN + 0x00ee: 0x064e, # ARABIC FATHA + 0x00ef: 0x064f, # ARABIC DAMMA + 0x00f0: 0x0650, # ARABIC KASRA + 0x00f1: 0x0651, # ARABIC SHADDA + 0x00f2: 0x0652, # ARABIC SUKUN + 0x00f3: None, + 0x00f4: None, + 0x00f5: None, + 0x00f6: None, + 0x00f7: None, + 0x00f8: None, + 0x00f9: None, + 0x00fa: None, + 0x00fb: None, + 0x00fc: None, + 0x00fd: None, + 0x00fe: None, + 0x00ff: None, }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u060c' # 0x00ac -> ARABIC COMMA + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u061b' # 0x00bb -> ARABIC SEMICOLON + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u061f' # 0x00bf -> ARABIC QUESTION MARK + u'\ufffe' + u'\u0621' # 0x00c1 -> ARABIC LETTER HAMZA + u'\u0622' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0x00c7 -> ARABIC LETTER ALEF + u'\u0628' # 0x00c8 -> ARABIC LETTER BEH + u'\u0629' # 0x00c9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0x00ca -> ARABIC LETTER TEH + u'\u062b' # 0x00cb -> ARABIC LETTER THEH + u'\u062c' # 0x00cc -> ARABIC LETTER JEEM + u'\u062d' # 0x00cd -> ARABIC LETTER HAH + u'\u062e' # 0x00ce -> ARABIC LETTER KHAH + u'\u062f' # 0x00cf -> ARABIC LETTER DAL + u'\u0630' # 0x00d0 -> ARABIC LETTER THAL + u'\u0631' # 0x00d1 -> ARABIC LETTER REH + u'\u0632' # 0x00d2 -> ARABIC LETTER ZAIN + u'\u0633' # 0x00d3 -> ARABIC LETTER SEEN + u'\u0634' # 0x00d4 -> ARABIC LETTER SHEEN + u'\u0635' # 0x00d5 -> ARABIC LETTER SAD + u'\u0636' # 0x00d6 -> ARABIC LETTER DAD + u'\u0637' # 0x00d7 -> ARABIC LETTER TAH + u'\u0638' # 0x00d8 -> ARABIC LETTER ZAH + u'\u0639' # 0x00d9 -> ARABIC LETTER AIN + u'\u063a' # 0x00da -> ARABIC LETTER GHAIN + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u0640' # 0x00e0 -> ARABIC TATWEEL + u'\u0641' # 0x00e1 -> ARABIC LETTER FEH + u'\u0642' # 0x00e2 -> ARABIC LETTER QAF + u'\u0643' # 0x00e3 -> ARABIC LETTER KAF + u'\u0644' # 0x00e4 -> ARABIC LETTER LAM + u'\u0645' # 0x00e5 -> ARABIC LETTER MEEM + u'\u0646' # 0x00e6 -> ARABIC LETTER NOON + u'\u0647' # 0x00e7 -> ARABIC LETTER HEH + u'\u0648' # 0x00e8 -> ARABIC LETTER WAW + u'\u0649' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0x00ea -> ARABIC LETTER YEH + u'\u064b' # 0x00eb -> ARABIC FATHATAN + u'\u064c' # 0x00ec -> ARABIC DAMMATAN + u'\u064d' # 0x00ed -> ARABIC KASRATAN + u'\u064e' # 0x00ee -> ARABIC FATHA + u'\u064f' # 0x00ef -> ARABIC DAMMA + u'\u0650' # 0x00f0 -> ARABIC KASRA + u'\u0651' # 0x00f1 -> ARABIC SHADDA + u'\u0652' # 0x00f2 -> ARABIC SUKUN + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x060c: 0x00ac, # ARABIC COMMA + 0x061b: 0x00bb, # ARABIC SEMICOLON + 0x061f: 0x00bf, # ARABIC QUESTION MARK + 0x0621: 0x00c1, # ARABIC LETTER HAMZA + 0x0622: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x0623: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x0624: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x0625: 0x00c5, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x0626: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x0627: 0x00c7, # ARABIC LETTER ALEF + 0x0628: 0x00c8, # ARABIC LETTER BEH + 0x0629: 0x00c9, # ARABIC LETTER TEH MARBUTA + 0x062a: 0x00ca, # ARABIC LETTER TEH + 0x062b: 0x00cb, # ARABIC LETTER THEH + 0x062c: 0x00cc, # ARABIC LETTER JEEM + 0x062d: 0x00cd, # ARABIC LETTER HAH + 0x062e: 0x00ce, # ARABIC LETTER KHAH + 0x062f: 0x00cf, # ARABIC LETTER DAL + 0x0630: 0x00d0, # ARABIC LETTER THAL + 0x0631: 0x00d1, # ARABIC LETTER REH + 0x0632: 0x00d2, # ARABIC LETTER ZAIN + 0x0633: 0x00d3, # ARABIC LETTER SEEN + 0x0634: 0x00d4, # ARABIC LETTER SHEEN + 0x0635: 0x00d5, # ARABIC LETTER SAD + 0x0636: 0x00d6, # ARABIC LETTER DAD + 0x0637: 0x00d7, # ARABIC LETTER TAH + 0x0638: 0x00d8, # ARABIC LETTER ZAH + 0x0639: 0x00d9, # ARABIC LETTER AIN + 0x063a: 0x00da, # ARABIC LETTER GHAIN + 0x0640: 0x00e0, # ARABIC TATWEEL + 0x0641: 0x00e1, # ARABIC LETTER FEH + 0x0642: 0x00e2, # ARABIC LETTER QAF + 0x0643: 0x00e3, # ARABIC LETTER KAF + 0x0644: 0x00e4, # ARABIC LETTER LAM + 0x0645: 0x00e5, # ARABIC LETTER MEEM + 0x0646: 0x00e6, # ARABIC LETTER NOON + 0x0647: 0x00e7, # ARABIC LETTER HEH + 0x0648: 0x00e8, # ARABIC LETTER WAW + 0x0649: 0x00e9, # ARABIC LETTER ALEF MAKSURA + 0x064a: 0x00ea, # ARABIC LETTER YEH + 0x064b: 0x00eb, # ARABIC FATHATAN + 0x064c: 0x00ec, # ARABIC DAMMATAN + 0x064d: 0x00ed, # ARABIC KASRATAN + 0x064e: 0x00ee, # ARABIC FATHA + 0x064f: 0x00ef, # ARABIC DAMMA + 0x0650: 0x00f0, # ARABIC KASRA + 0x0651: 0x00f1, # ARABIC SHADDA + 0x0652: 0x00f2, # ARABIC SUKUN +} \ No newline at end of file Index: iso8859_7.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_7.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- iso8859_7.py 8 Aug 2002 20:19:19 -0000 1.4 +++ iso8859_7.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-7.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-7.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,88 +32,603 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00a4: None, - 0x00a5: None, - 0x00aa: None, - 0x00ae: None, - 0x00af: 0x2015, # HORIZONTAL BAR - 0x00b4: 0x0384, # GREEK TONOS - 0x00b5: 0x0385, # GREEK DIALYTIKA TONOS - 0x00b6: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA - 0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA - 0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x00cc: 0x039c, # GREEK CAPITAL LETTER MU - 0x00cd: 0x039d, # GREEK CAPITAL LETTER NU - 0x00ce: 0x039e, # GREEK CAPITAL LETTER XI - 0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI - 0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x00d2: None, - 0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA - 0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA - 0x00e7: 0x03b7, # GREEK SMALL LETTER ETA - 0x00e8: 0x03b8, # GREEK SMALL LETTER THETA - 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00ec: 0x03bc, # GREEK SMALL LETTER MU - 0x00ed: 0x03bd, # GREEK SMALL LETTER NU - 0x00ee: 0x03be, # GREEK SMALL LETTER XI - 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00f0: 0x03c0, # GREEK SMALL LETTER PI - 0x00f1: 0x03c1, # GREEK SMALL LETTER RHO - 0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU - 0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00f6: 0x03c6, # GREEK SMALL LETTER PHI - 0x00f7: 0x03c7, # GREEK SMALL LETTER CHI - 0x00f8: 0x03c8, # GREEK SMALL LETTER PSI - 0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00ff: None, + 0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00a4: 0x20ac, # EURO SIGN + 0x00a5: 0x20af, # DRACHMA SIGN + 0x00aa: 0x037a, # GREEK YPOGEGRAMMENI + 0x00ae: None, + 0x00af: 0x2015, # HORIZONTAL BAR + 0x00b4: 0x0384, # GREEK TONOS + 0x00b5: 0x0385, # GREEK DIALYTIKA TONOS + 0x00b6: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA + 0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA + 0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x00cc: 0x039c, # GREEK CAPITAL LETTER MU + 0x00cd: 0x039d, # GREEK CAPITAL LETTER NU + 0x00ce: 0x039e, # GREEK CAPITAL LETTER XI + 0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI + 0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00d2: None, + 0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA + 0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA + 0x00e7: 0x03b7, # GREEK SMALL LETTER ETA + 0x00e8: 0x03b8, # GREEK SMALL LETTER THETA + 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00ec: 0x03bc, # GREEK SMALL LETTER MU + 0x00ed: 0x03bd, # GREEK SMALL LETTER NU + 0x00ee: 0x03be, # GREEK SMALL LETTER XI + 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00f0: 0x03c0, # GREEK SMALL LETTER PI + 0x00f1: 0x03c1, # GREEK SMALL LETTER RHO + 0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU + 0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00f6: 0x03c6, # GREEK SMALL LETTER PHI + 0x00f7: 0x03c7, # GREEK SMALL LETTER CHI + 0x00f8: 0x03c8, # GREEK SMALL LETTER PSI + 0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00ff: None, }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\u2018' # 0x00a1 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x00a2 -> RIGHT SINGLE QUOTATION MARK + u'\xa3' # 0x00a3 -> POUND SIGN + u'\u20ac' # 0x00a4 -> EURO SIGN + u'\u20af' # 0x00a5 -> DRACHMA SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u037a' # 0x00aa -> GREEK YPOGEGRAMMENI + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\ufffe' + u'\u2015' # 0x00af -> HORIZONTAL BAR + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\u0384' # 0x00b4 -> GREEK TONOS + u'\u0385' # 0x00b5 -> GREEK DIALYTIKA TONOS + u'\u0386' # 0x00b6 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\u0388' # 0x00b8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0x00b9 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0x00ba -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u038c' # 0x00bc -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\u038e' # 0x00be -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0x00bf -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\u0390' # 0x00c0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u0391' # 0x00c1 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0x00c2 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0x00c3 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x00c4 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0x00c5 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x00c6 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x00c7 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0x00c8 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0x00c9 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0x00ca -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0x00cb -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0x00cc -> GREEK CAPITAL LETTER MU + u'\u039d' # 0x00cd -> GREEK CAPITAL LETTER NU + u'\u039e' # 0x00ce -> GREEK CAPITAL LETTER XI + u'\u039f' # 0x00cf -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0x00d0 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0x00d1 -> GREEK CAPITAL LETTER RHO + u'\ufffe' + u'\u03a3' # 0x00d3 -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0x00d4 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0x00d5 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0x00d6 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0x00d7 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0x00d8 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x00d9 -> GREEK CAPITAL LETTER OMEGA + u'\u03aa' # 0x00da -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0x00db -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\u03ac' # 0x00dc -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0x00dd -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0x00de -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0x00df -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03b0' # 0x00e0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u03b1' # 0x00e1 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x00e2 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0x00e3 -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0x00e4 -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x00e5 -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0x00e6 -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0x00e7 -> GREEK SMALL LETTER ETA + u'\u03b8' # 0x00e8 -> GREEK SMALL LETTER THETA + u'\u03b9' # 0x00e9 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0x00ea -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x00eb -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x00ec -> GREEK SMALL LETTER MU + u'\u03bd' # 0x00ed -> GREEK SMALL LETTER NU + u'\u03be' # 0x00ee -> GREEK SMALL LETTER XI + u'\u03bf' # 0x00ef -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0x00f0 -> GREEK SMALL LETTER PI + u'\u03c1' # 0x00f1 -> GREEK SMALL LETTER RHO + u'\u03c2' # 0x00f2 -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c3' # 0x00f3 -> GREEK SMALL LETTER SIGMA + u'\u03c4' # 0x00f4 -> GREEK SMALL LETTER TAU + u'\u03c5' # 0x00f5 -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0x00f6 -> GREEK SMALL LETTER PHI + u'\u03c7' # 0x00f7 -> GREEK SMALL LETTER CHI + u'\u03c8' # 0x00f8 -> GREEK SMALL LETTER PSI + u'\u03c9' # 0x00f9 -> GREEK SMALL LETTER OMEGA + u'\u03ca' # 0x00fa -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03cb' # 0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03cc' # 0x00fc -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0x00fd -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03ce' # 0x00fe -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\ufffe' +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a3: 0x00a3, # POUND SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x037a: 0x00aa, # GREEK YPOGEGRAMMENI + 0x0384: 0x00b4, # GREEK TONOS + 0x0385: 0x00b5, # GREEK DIALYTIKA TONOS + 0x0386: 0x00b6, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0x00b8, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0x00b9, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038a: 0x00ba, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038c: 0x00bc, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038e: 0x00be, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038f: 0x00bf, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0x00c0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0x00c1, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0x00c2, # GREEK CAPITAL LETTER BETA + 0x0393: 0x00c3, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0x00c4, # GREEK CAPITAL LETTER DELTA + 0x0395: 0x00c5, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0x00c6, # GREEK CAPITAL LETTER ZETA + 0x0397: 0x00c7, # GREEK CAPITAL LETTER ETA + 0x0398: 0x00c8, # GREEK CAPITAL LETTER THETA + 0x0399: 0x00c9, # GREEK CAPITAL LETTER IOTA + 0x039a: 0x00ca, # GREEK CAPITAL LETTER KAPPA + 0x039b: 0x00cb, # GREEK CAPITAL LETTER LAMDA + 0x039c: 0x00cc, # GREEK CAPITAL LETTER MU + 0x039d: 0x00cd, # GREEK CAPITAL LETTER NU + 0x039e: 0x00ce, # GREEK CAPITAL LETTER XI + 0x039f: 0x00cf, # GREEK CAPITAL LETTER OMICRON + 0x03a0: 0x00d0, # GREEK CAPITAL LETTER PI + 0x03a1: 0x00d1, # GREEK CAPITAL LETTER RHO + 0x03a3: 0x00d3, # GREEK CAPITAL LETTER SIGMA + 0x03a4: 0x00d4, # GREEK CAPITAL LETTER TAU + 0x03a5: 0x00d5, # GREEK CAPITAL LETTER UPSILON + 0x03a6: 0x00d6, # GREEK CAPITAL LETTER PHI + 0x03a7: 0x00d7, # GREEK CAPITAL LETTER CHI + 0x03a8: 0x00d8, # GREEK CAPITAL LETTER PSI + 0x03a9: 0x00d9, # GREEK CAPITAL LETTER OMEGA + 0x03aa: 0x00da, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03ab: 0x00db, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03ac: 0x00dc, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03ad: 0x00dd, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03ae: 0x00de, # GREEK SMALL LETTER ETA WITH TONOS + 0x03af: 0x00df, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03b0: 0x00e0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03b1: 0x00e1, # GREEK SMALL LETTER ALPHA + 0x03b2: 0x00e2, # GREEK SMALL LETTER BETA + 0x03b3: 0x00e3, # GREEK SMALL LETTER GAMMA + 0x03b4: 0x00e4, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00e5, # GREEK SMALL LETTER EPSILON + 0x03b6: 0x00e6, # GREEK SMALL LETTER ZETA + 0x03b7: 0x00e7, # GREEK SMALL LETTER ETA + 0x03b8: 0x00e8, # GREEK SMALL LETTER THETA + 0x03b9: 0x00e9, # GREEK SMALL LETTER IOTA + 0x03ba: 0x00ea, # GREEK SMALL LETTER KAPPA + 0x03bb: 0x00eb, # GREEK SMALL LETTER LAMDA + 0x03bc: 0x00ec, # GREEK SMALL LETTER MU + 0x03bd: 0x00ed, # GREEK SMALL LETTER NU + 0x03be: 0x00ee, # GREEK SMALL LETTER XI + 0x03bf: 0x00ef, # GREEK SMALL LETTER OMICRON + 0x03c0: 0x00f0, # GREEK SMALL LETTER PI + 0x03c1: 0x00f1, # GREEK SMALL LETTER RHO + 0x03c2: 0x00f2, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0x00f3, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00f4, # GREEK SMALL LETTER TAU + 0x03c5: 0x00f5, # GREEK SMALL LETTER UPSILON + 0x03c6: 0x00f6, # GREEK SMALL LETTER PHI + 0x03c7: 0x00f7, # GREEK SMALL LETTER CHI + 0x03c8: 0x00f8, # GREEK SMALL LETTER PSI + 0x03c9: 0x00f9, # GREEK SMALL LETTER OMEGA + 0x03ca: 0x00fa, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0x00fb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0x00fc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0x00fd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0x00fe, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2015: 0x00af, # HORIZONTAL BAR + 0x2018: 0x00a1, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x00a2, # RIGHT SINGLE QUOTATION MARK + 0x20ac: 0x00a4, # EURO SIGN + 0x20af: 0x00a5, # DRACHMA SIGN +} \ No newline at end of file Index: iso8859_8.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_8.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- iso8859_8.py 8 Aug 2002 20:19:19 -0000 1.4 +++ iso8859_8.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-8.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-8.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,76 +32,558 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00a1: None, - 0x00aa: 0x00d7, # MULTIPLICATION SIGN - 0x00ba: 0x00f7, # DIVISION SIGN - 0x00bf: None, - 0x00c0: None, - 0x00c1: None, - 0x00c2: None, - 0x00c3: None, - 0x00c4: None, - 0x00c5: None, - 0x00c6: None, - 0x00c7: None, - 0x00c8: None, - 0x00c9: None, - 0x00ca: None, - 0x00cb: None, - 0x00cc: None, - 0x00cd: None, - 0x00ce: None, - 0x00cf: None, - 0x00d0: None, - 0x00d1: None, - 0x00d2: None, - 0x00d3: None, - 0x00d4: None, - 0x00d5: None, - 0x00d6: None, - 0x00d7: None, - 0x00d8: None, - 0x00d9: None, - 0x00da: None, - 0x00db: None, - 0x00dc: None, - 0x00dd: None, - 0x00de: None, - 0x00df: 0x2017, # DOUBLE LOW LINE - 0x00e0: 0x05d0, # HEBREW LETTER ALEF - 0x00e1: 0x05d1, # HEBREW LETTER BET - 0x00e2: 0x05d2, # HEBREW LETTER GIMEL - 0x00e3: 0x05d3, # HEBREW LETTER DALET - 0x00e4: 0x05d4, # HEBREW LETTER HE - 0x00e5: 0x05d5, # HEBREW LETTER VAV - 0x00e6: 0x05d6, # HEBREW LETTER ZAYIN - 0x00e7: 0x05d7, # HEBREW LETTER HET - 0x00e8: 0x05d8, # HEBREW LETTER TET - 0x00e9: 0x05d9, # HEBREW LETTER YOD - 0x00ea: 0x05da, # HEBREW LETTER FINAL KAF - 0x00eb: 0x05db, # HEBREW LETTER KAF - 0x00ec: 0x05dc, # HEBREW LETTER LAMED - 0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM - 0x00ee: 0x05de, # HEBREW LETTER MEM - 0x00ef: 0x05df, # HEBREW LETTER FINAL NUN - 0x00f0: 0x05e0, # HEBREW LETTER NUN - 0x00f1: 0x05e1, # HEBREW LETTER SAMEKH - 0x00f2: 0x05e2, # HEBREW LETTER AYIN - 0x00f3: 0x05e3, # HEBREW LETTER FINAL PE - 0x00f4: 0x05e4, # HEBREW LETTER PE - 0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x00f6: 0x05e6, # HEBREW LETTER TSADI - 0x00f7: 0x05e7, # HEBREW LETTER QOF - 0x00f8: 0x05e8, # HEBREW LETTER RESH - 0x00f9: 0x05e9, # HEBREW LETTER SHIN - 0x00fa: 0x05ea, # HEBREW LETTER TAV - 0x00fb: None, - 0x00fc: None, - 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK - 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK - 0x00ff: None, + 0x00a1: None, + 0x00aa: 0x00d7, # MULTIPLICATION SIGN + 0x00ba: 0x00f7, # DIVISION SIGN + 0x00bf: None, + 0x00c0: None, + 0x00c1: None, + 0x00c2: None, + 0x00c3: None, + 0x00c4: None, + 0x00c5: None, + 0x00c6: None, + 0x00c7: None, + 0x00c8: None, + 0x00c9: None, + 0x00ca: None, + 0x00cb: None, + 0x00cc: None, + 0x00cd: None, + 0x00ce: None, + 0x00cf: None, + 0x00d0: None, + 0x00d1: None, + 0x00d2: None, + 0x00d3: None, + 0x00d4: None, + 0x00d5: None, + 0x00d6: None, + 0x00d7: None, + 0x00d8: None, + 0x00d9: None, + 0x00da: None, + 0x00db: None, + 0x00dc: None, + 0x00dd: None, + 0x00de: None, + 0x00df: 0x2017, # DOUBLE LOW LINE + 0x00e0: 0x05d0, # HEBREW LETTER ALEF + 0x00e1: 0x05d1, # HEBREW LETTER BET + 0x00e2: 0x05d2, # HEBREW LETTER GIMEL + 0x00e3: 0x05d3, # HEBREW LETTER DALET + 0x00e4: 0x05d4, # HEBREW LETTER HE + 0x00e5: 0x05d5, # HEBREW LETTER VAV + 0x00e6: 0x05d6, # HEBREW LETTER ZAYIN + 0x00e7: 0x05d7, # HEBREW LETTER HET + 0x00e8: 0x05d8, # HEBREW LETTER TET + 0x00e9: 0x05d9, # HEBREW LETTER YOD + 0x00ea: 0x05da, # HEBREW LETTER FINAL KAF + 0x00eb: 0x05db, # HEBREW LETTER KAF + 0x00ec: 0x05dc, # HEBREW LETTER LAMED + 0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM + 0x00ee: 0x05de, # HEBREW LETTER MEM + 0x00ef: 0x05df, # HEBREW LETTER FINAL NUN + 0x00f0: 0x05e0, # HEBREW LETTER NUN + 0x00f1: 0x05e1, # HEBREW LETTER SAMEKH + 0x00f2: 0x05e2, # HEBREW LETTER AYIN + 0x00f3: 0x05e3, # HEBREW LETTER FINAL PE + 0x00f4: 0x05e4, # HEBREW LETTER PE + 0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x00f6: 0x05e6, # HEBREW LETTER TSADI + 0x00f7: 0x05e7, # HEBREW LETTER QOF + 0x00f8: 0x05e8, # HEBREW LETTER RESH + 0x00f9: 0x05e9, # HEBREW LETTER SHIN + 0x00fa: 0x05ea, # HEBREW LETTER TAV + 0x00fb: None, + 0x00fc: None, + 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK + 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK + 0x00ff: None, }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\ufffe' + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xd7' # 0x00aa -> MULTIPLICATION SIGN + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xf7' # 0x00ba -> DIVISION SIGN + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u2017' # 0x00df -> DOUBLE LOW LINE + u'\u05d0' # 0x00e0 -> HEBREW LETTER ALEF + u'\u05d1' # 0x00e1 -> HEBREW LETTER BET + u'\u05d2' # 0x00e2 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x00e3 -> HEBREW LETTER DALET + u'\u05d4' # 0x00e4 -> HEBREW LETTER HE + u'\u05d5' # 0x00e5 -> HEBREW LETTER VAV + u'\u05d6' # 0x00e6 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x00e7 -> HEBREW LETTER HET + u'\u05d8' # 0x00e8 -> HEBREW LETTER TET + u'\u05d9' # 0x00e9 -> HEBREW LETTER YOD + u'\u05da' # 0x00ea -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x00eb -> HEBREW LETTER KAF + u'\u05dc' # 0x00ec -> HEBREW LETTER LAMED + u'\u05dd' # 0x00ed -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x00ee -> HEBREW LETTER MEM + u'\u05df' # 0x00ef -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x00f0 -> HEBREW LETTER NUN + u'\u05e1' # 0x00f1 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0x00f2 -> HEBREW LETTER AYIN + u'\u05e3' # 0x00f3 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x00f4 -> HEBREW LETTER PE + u'\u05e5' # 0x00f5 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x00f6 -> HEBREW LETTER TSADI + u'\u05e7' # 0x00f7 -> HEBREW LETTER QOF + u'\u05e8' # 0x00f8 -> HEBREW LETTER RESH + u'\u05e9' # 0x00f9 -> HEBREW LETTER SHIN + u'\u05ea' # 0x00fa -> HEBREW LETTER TAV + u'\ufffe' + u'\ufffe' + u'\u200e' # 0x00fd -> LEFT-TO-RIGHT MARK + u'\u200f' # 0x00fe -> RIGHT-TO-LEFT MARK + u'\ufffe' +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00d7: 0x00aa, # MULTIPLICATION SIGN + 0x00f7: 0x00ba, # DIVISION SIGN + 0x05d0: 0x00e0, # HEBREW LETTER ALEF + 0x05d1: 0x00e1, # HEBREW LETTER BET + 0x05d2: 0x00e2, # HEBREW LETTER GIMEL + 0x05d3: 0x00e3, # HEBREW LETTER DALET + 0x05d4: 0x00e4, # HEBREW LETTER HE + 0x05d5: 0x00e5, # HEBREW LETTER VAV + 0x05d6: 0x00e6, # HEBREW LETTER ZAYIN + 0x05d7: 0x00e7, # HEBREW LETTER HET + 0x05d8: 0x00e8, # HEBREW LETTER TET + 0x05d9: 0x00e9, # HEBREW LETTER YOD + 0x05da: 0x00ea, # HEBREW LETTER FINAL KAF + 0x05db: 0x00eb, # HEBREW LETTER KAF + 0x05dc: 0x00ec, # HEBREW LETTER LAMED + 0x05dd: 0x00ed, # HEBREW LETTER FINAL MEM + 0x05de: 0x00ee, # HEBREW LETTER MEM + 0x05df: 0x00ef, # HEBREW LETTER FINAL NUN + 0x05e0: 0x00f0, # HEBREW LETTER NUN + 0x05e1: 0x00f1, # HEBREW LETTER SAMEKH + 0x05e2: 0x00f2, # HEBREW LETTER AYIN + 0x05e3: 0x00f3, # HEBREW LETTER FINAL PE + 0x05e4: 0x00f4, # HEBREW LETTER PE + 0x05e5: 0x00f5, # HEBREW LETTER FINAL TSADI + 0x05e6: 0x00f6, # HEBREW LETTER TSADI + 0x05e7: 0x00f7, # HEBREW LETTER QOF + 0x05e8: 0x00f8, # HEBREW LETTER RESH + 0x05e9: 0x00f9, # HEBREW LETTER SHIN + 0x05ea: 0x00fa, # HEBREW LETTER TAV + 0x200e: 0x00fd, # LEFT-TO-RIGHT MARK + 0x200f: 0x00fe, # RIGHT-TO-LEFT MARK + 0x2017: 0x00df, # DOUBLE LOW LINE +} \ No newline at end of file Index: iso8859_9.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_9.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- iso8859_9.py 8 Aug 2002 20:19:19 -0000 1.4 +++ iso8859_9.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from '8859-9.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'ISO8859/8859-9.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,14 +32,532 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\x80' # 0x0080 -> + u'\x81' # 0x0081 -> + u'\x82' # 0x0082 -> + u'\x83' # 0x0083 -> + u'\x84' # 0x0084 -> + u'\x85' # 0x0085 -> + u'\x86' # 0x0086 -> + u'\x87' # 0x0087 -> + u'\x88' # 0x0088 -> + u'\x89' # 0x0089 -> + u'\x8a' # 0x008a -> + u'\x8b' # 0x008b -> + u'\x8c' # 0x008c -> + u'\x8d' # 0x008d -> + u'\x8e' # 0x008e -> + u'\x8f' # 0x008f -> + u'\x90' # 0x0090 -> + u'\x91' # 0x0091 -> + u'\x92' # 0x0092 -> + u'\x93' # 0x0093 -> + u'\x94' # 0x0094 -> + u'\x95' # 0x0095 -> + u'\x96' # 0x0096 -> + u'\x97' # 0x0097 -> + u'\x98' # 0x0098 -> + u'\x99' # 0x0099 -> + u'\x9a' # 0x009a -> + u'\x9b' # 0x009b -> + u'\x9c' # 0x009c -> + u'\x9d' # 0x009d -> + u'\x9e' # 0x009e -> + u'\x9f' # 0x009f -> + u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\xa1' # 0x00a1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa5' # 0x00a5 -> YEN SIGN + u'\xa6' # 0x00a6 -> BROKEN BAR + u'\xa7' # 0x00a7 -> SECTION SIGN + u'\xa8' # 0x00a8 -> DIAERESIS + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\xaa' # 0x00aa -> FEMININE ORDINAL INDICATOR + u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0x00ac -> NOT SIGN + u'\xad' # 0x00ad -> SOFT HYPHEN + u'\xae' # 0x00ae -> REGISTERED SIGN + u'\xaf' # 0x00af -> MACRON + u'\xb0' # 0x00b0 -> DEGREE SIGN + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO + u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE + u'\xb4' # 0x00b4 -> ACUTE ACCENT + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\xb6' # 0x00b6 -> PILCROW SIGN + u'\xb7' # 0x00b7 -> MIDDLE DOT + u'\xb8' # 0x00b8 -> CEDILLA + u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE + u'\xba' # 0x00ba -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0x00bf -> INVERTED QUESTION MARK + u'\xc0' # 0x00c0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x00c1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00c2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0x00c3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0x00c4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x00c5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0x00c6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0x00c7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0x00c8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0x00c9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x00ca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00cb -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0x00cc -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0x00cd -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00ce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00cf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u011e' # 0x00d0 -> LATIN CAPITAL LETTER G WITH BREVE + u'\xd1' # 0x00d1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0x00d2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0x00d3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00d4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0x00d5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0x00d6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0x00d7 -> MULTIPLICATION SIGN + u'\xd8' # 0x00d8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0x00d9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0x00da -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00db -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0x00dc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0130' # 0x00dd -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u015e' # 0x00de -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\xdf' # 0x00df -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0x00e0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x00e1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0x00e2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x00e3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0x00e4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0x00e5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0x00e6 -> LATIN SMALL LETTER AE + u'\xe7' # 0x00e7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0x00e8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0x00e9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x00ea -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x00eb -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0x00ed -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x00ee -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x00ef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u011f' # 0x00f0 -> LATIN SMALL LETTER G WITH BREVE + u'\xf1' # 0x00f1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0x00f2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0x00f3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0x00f4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x00f5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0x00f6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x00f7 -> DIVISION SIGN + u'\xf8' # 0x00f8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0x00f9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0x00fa -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0x00fb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x00fc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0131' # 0x00fd -> LATIN SMALL LETTER DOTLESS I + u'\u015f' # 0x00fe -> LATIN SMALL LETTER S WITH CEDILLA + u'\xff' # 0x00ff -> LATIN SMALL LETTER Y WITH DIAERESIS +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x0080: 0x0080, # + 0x0081: 0x0081, # + 0x0082: 0x0082, # + 0x0083: 0x0083, # + 0x0084: 0x0084, # + 0x0085: 0x0085, # + 0x0086: 0x0086, # + 0x0087: 0x0087, # + 0x0088: 0x0088, # + 0x0089: 0x0089, # + 0x008a: 0x008a, # + 0x008b: 0x008b, # + 0x008c: 0x008c, # + 0x008d: 0x008d, # + 0x008e: 0x008e, # + 0x008f: 0x008f, # + 0x0090: 0x0090, # + 0x0091: 0x0091, # + 0x0092: 0x0092, # + 0x0093: 0x0093, # + 0x0094: 0x0094, # + 0x0095: 0x0095, # + 0x0096: 0x0096, # + 0x0097: 0x0097, # + 0x0098: 0x0098, # + 0x0099: 0x0099, # + 0x009a: 0x009a, # + 0x009b: 0x009b, # + 0x009c: 0x009c, # + 0x009d: 0x009d, # + 0x009e: 0x009e, # + 0x009f: 0x009f, # + 0x00a0: 0x00a0, # NO-BREAK SPACE + 0x00a1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a5: 0x00a5, # YEN SIGN + 0x00a6: 0x00a6, # BROKEN BAR + 0x00a7: 0x00a7, # SECTION SIGN + 0x00a8: 0x00a8, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00ac, # NOT SIGN + 0x00ad: 0x00ad, # SOFT HYPHEN + 0x00ae: 0x00ae, # REGISTERED SIGN + 0x00af: 0x00af, # MACRON + 0x00b0: 0x00b0, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x00b2, # SUPERSCRIPT TWO + 0x00b3: 0x00b3, # SUPERSCRIPT THREE + 0x00b4: 0x00b4, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00b6, # PILCROW SIGN + 0x00b7: 0x00b7, # MIDDLE DOT + 0x00b8: 0x00b8, # CEDILLA + 0x00b9: 0x00b9, # SUPERSCRIPT ONE + 0x00ba: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00bf, # INVERTED QUESTION MARK + 0x00c0: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00c7: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00d7, # MULTIPLICATION SIGN + 0x00d8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00e6, # LATIN SMALL LETTER AE + 0x00e7: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f7, # DIVISION SIGN + 0x00f8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011e: 0x00d0, # LATIN CAPITAL LETTER G WITH BREVE + 0x011f: 0x00f0, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0x00dd, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0x00fd, # LATIN SMALL LETTER DOTLESS I + 0x015e: 0x00de, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00fe, # LATIN SMALL LETTER S WITH CEDILLA +} \ No newline at end of file Index: koi8_r.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/koi8_r.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- koi8_r.py 8 Aug 2002 20:19:19 -0000 1.4 +++ koi8_r.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'KOI8-R.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MISC/KOI8-R.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,136 +32,654 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x0083: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x0084: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x0085: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x0086: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x0087: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x0088: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x0089: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x008a: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x008b: 0x2580, # UPPER HALF BLOCK - 0x008c: 0x2584, # LOWER HALF BLOCK - 0x008d: 0x2588, # FULL BLOCK - 0x008e: 0x258c, # LEFT HALF BLOCK - 0x008f: 0x2590, # RIGHT HALF BLOCK - 0x0090: 0x2591, # LIGHT SHADE - 0x0091: 0x2592, # MEDIUM SHADE - 0x0092: 0x2593, # DARK SHADE - 0x0093: 0x2320, # TOP HALF INTEGRAL - 0x0094: 0x25a0, # BLACK SQUARE - 0x0095: 0x2219, # BULLET OPERATOR - 0x0096: 0x221a, # SQUARE ROOT - 0x0097: 0x2248, # ALMOST EQUAL TO - 0x0098: 0x2264, # LESS-THAN OR EQUAL TO - 0x0099: 0x2265, # GREATER-THAN OR EQUAL TO - 0x009a: 0x00a0, # NO-BREAK SPACE - 0x009b: 0x2321, # BOTTOM HALF INTEGRAL - 0x009c: 0x00b0, # DEGREE SIGN - 0x009d: 0x00b2, # SUPERSCRIPT TWO - 0x009e: 0x00b7, # MIDDLE DOT - 0x009f: 0x00f7, # DIVISION SIGN - 0x00a0: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00a1: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00a2: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00a3: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00a4: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00a5: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00a6: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00a7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00a8: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00a9: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00aa: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00ab: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00ac: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00ad: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00ae: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00af: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00b0: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00b1: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00b2: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b3: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00b4: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b5: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00b6: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00b7: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00b8: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00b9: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00ba: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00bb: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00bc: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00bd: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00be: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00bf: 0x00a9, # COPYRIGHT SIGN - 0x00c0: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00c1: 0x0430, # CYRILLIC SMALL LETTER A - 0x00c2: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00c3: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00c4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00c5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00c6: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00c7: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00c8: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00c9: 0x0438, # CYRILLIC SMALL LETTER I - 0x00ca: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00cb: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00cc: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00cd: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ce: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00cf: 0x043e, # CYRILLIC SMALL LETTER O - 0x00d0: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00d1: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00d2: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00d3: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00d4: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00d5: 0x0443, # CYRILLIC SMALL LETTER U - 0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00d7: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00d8: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00d9: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00da: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00db: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00dc: 0x044d, # CYRILLIC SMALL LETTER E - 0x00dd: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00de: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00df: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00e0: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x00e1: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00e2: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00e3: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00e4: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00e5: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00e6: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00e7: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00e8: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00e9: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00ea: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00eb: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00ec: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00ed: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00ee: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00ef: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00f0: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00f1: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00f2: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00f3: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00f4: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00f5: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00f6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00f7: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00f8: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00f9: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00fa: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00fb: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00fc: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x0083: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x0084: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x0085: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x0086: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x0087: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x0088: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x0089: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x008a: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x008b: 0x2580, # UPPER HALF BLOCK + 0x008c: 0x2584, # LOWER HALF BLOCK + 0x008d: 0x2588, # FULL BLOCK + 0x008e: 0x258c, # LEFT HALF BLOCK + 0x008f: 0x2590, # RIGHT HALF BLOCK + 0x0090: 0x2591, # LIGHT SHADE + 0x0091: 0x2592, # MEDIUM SHADE + 0x0092: 0x2593, # DARK SHADE + 0x0093: 0x2320, # TOP HALF INTEGRAL + 0x0094: 0x25a0, # BLACK SQUARE + 0x0095: 0x2219, # BULLET OPERATOR + 0x0096: 0x221a, # SQUARE ROOT + 0x0097: 0x2248, # ALMOST EQUAL TO + 0x0098: 0x2264, # LESS-THAN OR EQUAL TO + 0x0099: 0x2265, # GREATER-THAN OR EQUAL TO + 0x009a: 0x00a0, # NO-BREAK SPACE + 0x009b: 0x2321, # BOTTOM HALF INTEGRAL + 0x009c: 0x00b0, # DEGREE SIGN + 0x009d: 0x00b2, # SUPERSCRIPT TWO + 0x009e: 0x00b7, # MIDDLE DOT + 0x009f: 0x00f7, # DIVISION SIGN + 0x00a0: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00a1: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00a2: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00a3: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00a4: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00a5: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00a6: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00a7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00a8: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00a9: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00aa: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00ab: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00ac: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00ad: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00ae: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00af: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00b0: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00b1: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00b2: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b3: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00b4: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b5: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00b6: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00b7: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00b8: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00b9: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00ba: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00bb: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00bc: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00bd: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00be: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00bf: 0x00a9, # COPYRIGHT SIGN + 0x00c0: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00c1: 0x0430, # CYRILLIC SMALL LETTER A + 0x00c2: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00c3: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00c4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00c5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00c6: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00c7: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00c8: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00c9: 0x0438, # CYRILLIC SMALL LETTER I + 0x00ca: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00cb: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00cc: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00cd: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00ce: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00cf: 0x043e, # CYRILLIC SMALL LETTER O + 0x00d0: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00d1: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00d2: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00d3: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00d4: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00d5: 0x0443, # CYRILLIC SMALL LETTER U + 0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00d7: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00d8: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00d9: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00da: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00db: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00dc: 0x044d, # CYRILLIC SMALL LETTER E + 0x00dd: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00de: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00df: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00e0: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x00e1: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x00e2: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x00e3: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x00e4: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x00e5: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x00e6: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x00e7: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x00e8: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x00e9: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x00ea: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x00eb: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x00ec: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x00ed: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x00ee: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x00ef: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x00f0: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x00f1: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00f2: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x00f3: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x00f4: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x00f5: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x00f6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x00f7: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x00f8: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x00f9: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x00fa: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x00fb: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x00fc: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u2500' # 0x0080 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u2502' # 0x0081 -> BOX DRAWINGS LIGHT VERTICAL + u'\u250c' # 0x0082 -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2510' # 0x0083 -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x0084 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2518' # 0x0085 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u251c' # 0x0086 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2524' # 0x0087 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u252c' # 0x0088 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u2534' # 0x0089 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u253c' # 0x008a -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u2580' # 0x008b -> UPPER HALF BLOCK + u'\u2584' # 0x008c -> LOWER HALF BLOCK + u'\u2588' # 0x008d -> FULL BLOCK + u'\u258c' # 0x008e -> LEFT HALF BLOCK + u'\u2590' # 0x008f -> RIGHT HALF BLOCK + u'\u2591' # 0x0090 -> LIGHT SHADE + u'\u2592' # 0x0091 -> MEDIUM SHADE + u'\u2593' # 0x0092 -> DARK SHADE + u'\u2320' # 0x0093 -> TOP HALF INTEGRAL + u'\u25a0' # 0x0094 -> BLACK SQUARE + u'\u2219' # 0x0095 -> BULLET OPERATOR + u'\u221a' # 0x0096 -> SQUARE ROOT + u'\u2248' # 0x0097 -> ALMOST EQUAL TO + u'\u2264' # 0x0098 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x0099 -> GREATER-THAN OR EQUAL TO + u'\xa0' # 0x009a -> NO-BREAK SPACE + u'\u2321' # 0x009b -> BOTTOM HALF INTEGRAL + u'\xb0' # 0x009c -> DEGREE SIGN + u'\xb2' # 0x009d -> SUPERSCRIPT TWO + u'\xb7' # 0x009e -> MIDDLE DOT + u'\xf7' # 0x009f -> DIVISION SIGN + u'\u2550' # 0x00a0 -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u2551' # 0x00a1 -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2552' # 0x00a2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u0451' # 0x00a3 -> CYRILLIC SMALL LETTER IO + u'\u2553' # 0x00a4 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u2554' # 0x00a5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2555' # 0x00a6 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2556' # 0x00a7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2557' # 0x00a8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u2558' # 0x00a9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2559' # 0x00aa -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00ab -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u255b' # 0x00ac -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u255c' # 0x00ad -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255d' # 0x00ae -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255e' # 0x00af -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00b0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u2560' # 0x00b1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2561' # 0x00b2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u0401' # 0x00b3 -> CYRILLIC CAPITAL LETTER IO + u'\u2562' # 0x00b4 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2563' # 0x00b5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2564' # 0x00b6 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2566' # 0x00b8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2567' # 0x00b9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00ba -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2569' # 0x00bb -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u256a' # 0x00bc -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u256b' # 0x00bd -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256c' # 0x00be -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa9' # 0x00bf -> COPYRIGHT SIGN + u'\u044e' # 0x00c0 -> CYRILLIC SMALL LETTER YU + u'\u0430' # 0x00c1 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0x00c2 -> CYRILLIC SMALL LETTER BE + u'\u0446' # 0x00c3 -> CYRILLIC SMALL LETTER TSE + u'\u0434' # 0x00c4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0x00c5 -> CYRILLIC SMALL LETTER IE + u'\u0444' # 0x00c6 -> CYRILLIC SMALL LETTER EF + u'\u0433' # 0x00c7 -> CYRILLIC SMALL LETTER GHE + u'\u0445' # 0x00c8 -> CYRILLIC SMALL LETTER HA + u'\u0438' # 0x00c9 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0x00ca -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0x00cb -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0x00cc -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0x00cd -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0x00ce -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0x00cf -> CYRILLIC SMALL LETTER O + u'\u043f' # 0x00d0 -> CYRILLIC SMALL LETTER PE + u'\u044f' # 0x00d1 -> CYRILLIC SMALL LETTER YA + u'\u0440' # 0x00d2 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0x00d3 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0x00d4 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0x00d5 -> CYRILLIC SMALL LETTER U + u'\u0436' # 0x00d6 -> CYRILLIC SMALL LETTER ZHE + u'\u0432' # 0x00d7 -> CYRILLIC SMALL LETTER VE + u'\u044c' # 0x00d8 -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044b' # 0x00d9 -> CYRILLIC SMALL LETTER YERU + u'\u0437' # 0x00da -> CYRILLIC SMALL LETTER ZE + u'\u0448' # 0x00db -> CYRILLIC SMALL LETTER SHA + u'\u044d' # 0x00dc -> CYRILLIC SMALL LETTER E + u'\u0449' # 0x00dd -> CYRILLIC SMALL LETTER SHCHA + u'\u0447' # 0x00de -> CYRILLIC SMALL LETTER CHE + u'\u044a' # 0x00df -> CYRILLIC SMALL LETTER HARD SIGN + u'\u042e' # 0x00e0 -> CYRILLIC CAPITAL LETTER YU + u'\u0410' # 0x00e1 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0x00e2 -> CYRILLIC CAPITAL LETTER BE + u'\u0426' # 0x00e3 -> CYRILLIC CAPITAL LETTER TSE + u'\u0414' # 0x00e4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0x00e5 -> CYRILLIC CAPITAL LETTER IE + u'\u0424' # 0x00e6 -> CYRILLIC CAPITAL LETTER EF + u'\u0413' # 0x00e7 -> CYRILLIC CAPITAL LETTER GHE + u'\u0425' # 0x00e8 -> CYRILLIC CAPITAL LETTER HA + u'\u0418' # 0x00e9 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0x00ea -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0x00eb -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0x00ec -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0x00ed -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0x00ee -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0x00ef -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0x00f0 -> CYRILLIC CAPITAL LETTER PE + u'\u042f' # 0x00f1 -> CYRILLIC CAPITAL LETTER YA + u'\u0420' # 0x00f2 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0x00f3 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0x00f4 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0x00f5 -> CYRILLIC CAPITAL LETTER U + u'\u0416' # 0x00f6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0412' # 0x00f7 -> CYRILLIC CAPITAL LETTER VE + u'\u042c' # 0x00f8 -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042b' # 0x00f9 -> CYRILLIC CAPITAL LETTER YERU + u'\u0417' # 0x00fa -> CYRILLIC CAPITAL LETTER ZE + u'\u0428' # 0x00fb -> CYRILLIC CAPITAL LETTER SHA + u'\u042d' # 0x00fc -> CYRILLIC CAPITAL LETTER E + u'\u0429' # 0x00fd -> CYRILLIC CAPITAL LETTER SHCHA + u'\u0427' # 0x00fe -> CYRILLIC CAPITAL LETTER CHE + u'\u042a' # 0x00ff -> CYRILLIC CAPITAL LETTER HARD SIGN +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x009a, # NO-BREAK SPACE + 0x00a9: 0x00bf, # COPYRIGHT SIGN + 0x00b0: 0x009c, # DEGREE SIGN + 0x00b2: 0x009d, # SUPERSCRIPT TWO + 0x00b7: 0x009e, # MIDDLE DOT + 0x00f7: 0x009f, # DIVISION SIGN + 0x0401: 0x00b3, # CYRILLIC CAPITAL LETTER IO + 0x0410: 0x00e1, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x00e2, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x00f7, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x00e7, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x00e4, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x00e5, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x00f6, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x00fa, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x00e9, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x00ea, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x00eb, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x00ec, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x00ed, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x00ee, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x00ef, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x00f0, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x00f2, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x00f3, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x00f4, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x00f5, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x00e6, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x00e8, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x00e3, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x00fe, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x00fb, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x00fd, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x00ff, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x00f9, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x00f8, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x00fc, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x00e0, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x00f1, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00c1, # CYRILLIC SMALL LETTER A + 0x0431: 0x00c2, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00d7, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00c7, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00c4, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00c5, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00d6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00da, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00c9, # CYRILLIC SMALL LETTER I + 0x0439: 0x00ca, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00cb, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00cc, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00cd, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00ce, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00cf, # CYRILLIC SMALL LETTER O + 0x043f: 0x00d0, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00d2, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00d3, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00d4, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00d5, # CYRILLIC SMALL LETTER U + 0x0444: 0x00c6, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00c8, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00c3, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00de, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00db, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00dd, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x00df, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00d9, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00d8, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00dc, # CYRILLIC SMALL LETTER E + 0x044e: 0x00c0, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00d1, # CYRILLIC SMALL LETTER YA + 0x0451: 0x00a3, # CYRILLIC SMALL LETTER IO + 0x2219: 0x0095, # BULLET OPERATOR + 0x221a: 0x0096, # SQUARE ROOT + 0x2248: 0x0097, # ALMOST EQUAL TO + 0x2264: 0x0098, # LESS-THAN OR EQUAL TO + 0x2265: 0x0099, # GREATER-THAN OR EQUAL TO + 0x2320: 0x0093, # TOP HALF INTEGRAL + 0x2321: 0x009b, # BOTTOM HALF INTEGRAL + 0x2500: 0x0080, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x0081, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x0082, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x0083, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x0084, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x0085, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x0086, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x0087, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x0088, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x0089, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x008a, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00a0, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00a1, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00a2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00a4, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00a5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00a6, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00a7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00a8, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00a9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00aa, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00ab, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00ac, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00ad, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00ae, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00af, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00b0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00b1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b4, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00b6, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00b8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00b9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00ba, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00bb, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00bc, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00bd, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00be, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x008b, # UPPER HALF BLOCK + 0x2584: 0x008c, # LOWER HALF BLOCK + 0x2588: 0x008d, # FULL BLOCK + 0x258c: 0x008e, # LEFT HALF BLOCK + 0x2590: 0x008f, # RIGHT HALF BLOCK + 0x2591: 0x0090, # LIGHT SHADE + 0x2592: 0x0091, # MEDIUM SHADE + 0x2593: 0x0092, # DARK SHADE + 0x25a0: 0x0094, # BLACK SQUARE +} \ No newline at end of file Index: mac_cyrillic.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_cyrillic.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- mac_cyrillic.py 8 Aug 2002 20:19:19 -0000 1.4 +++ mac_cyrillic.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CYRILLIC.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/APPLE/CYRILLIC.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,131 +32,650 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00a0: 0x2020, # DAGGER - 0x00a1: 0x00b0, # DEGREE SIGN - 0x00a4: 0x00a7, # SECTION SIGN - 0x00a5: 0x2022, # BULLET - 0x00a6: 0x00b6, # PILCROW SIGN - 0x00a7: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x2122, # TRADE MARK SIGN - 0x00ab: 0x0402, # CYRILLIC CAPITAL LETTER DJE - 0x00ac: 0x0452, # CYRILLIC SMALL LETTER DJE - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x0403, # CYRILLIC CAPITAL LETTER GJE - 0x00af: 0x0453, # CYRILLIC SMALL LETTER GJE - 0x00b0: 0x221e, # INFINITY - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL - 0x00b7: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x00b8: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00b9: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00ba: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00bb: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00bc: 0x0409, # CYRILLIC CAPITAL LETTER LJE - 0x00bd: 0x0459, # CYRILLIC SMALL LETTER LJE - 0x00be: 0x040a, # CYRILLIC CAPITAL LETTER NJE - 0x00bf: 0x045a, # CYRILLIC SMALL LETTER NJE - 0x00c0: 0x0458, # CYRILLIC SMALL LETTER JE - 0x00c1: 0x0405, # CYRILLIC CAPITAL LETTER DZE - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x221a, # SQUARE ROOT - 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00c5: 0x2248, # ALMOST EQUAL TO - 0x00c6: 0x2206, # INCREMENT - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x040b, # CYRILLIC CAPITAL LETTER TSHE - 0x00cc: 0x045b, # CYRILLIC SMALL LETTER TSHE - 0x00cd: 0x040c, # CYRILLIC CAPITAL LETTER KJE - 0x00ce: 0x045c, # CYRILLIC SMALL LETTER KJE - 0x00cf: 0x0455, # CYRILLIC SMALL LETTER DZE - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2014, # EM DASH - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00d8: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00d9: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00da: 0x040f, # CYRILLIC CAPITAL LETTER DZHE - 0x00db: 0x045f, # CYRILLIC SMALL LETTER DZHE - 0x00dc: 0x2116, # NUMERO SIGN - 0x00dd: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00de: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00df: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O - 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E - 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ff: 0x00a4, # CURRENCY SIGN + 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00a0: 0x2020, # DAGGER + 0x00a1: 0x00b0, # DEGREE SIGN + 0x00a2: 0x0490, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x00a4: 0x00a7, # SECTION SIGN + 0x00a5: 0x2022, # BULLET + 0x00a6: 0x00b6, # PILCROW SIGN + 0x00a7: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x2122, # TRADE MARK SIGN + 0x00ab: 0x0402, # CYRILLIC CAPITAL LETTER DJE + 0x00ac: 0x0452, # CYRILLIC SMALL LETTER DJE + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x0403, # CYRILLIC CAPITAL LETTER GJE + 0x00af: 0x0453, # CYRILLIC SMALL LETTER GJE + 0x00b0: 0x221e, # INFINITY + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00b6: 0x0491, # CYRILLIC SMALL LETTER GHE WITH UPTURN + 0x00b7: 0x0408, # CYRILLIC CAPITAL LETTER JE + 0x00b8: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x00b9: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x00ba: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x00bb: 0x0457, # CYRILLIC SMALL LETTER YI + 0x00bc: 0x0409, # CYRILLIC CAPITAL LETTER LJE + 0x00bd: 0x0459, # CYRILLIC SMALL LETTER LJE + 0x00be: 0x040a, # CYRILLIC CAPITAL LETTER NJE + 0x00bf: 0x045a, # CYRILLIC SMALL LETTER NJE + 0x00c0: 0x0458, # CYRILLIC SMALL LETTER JE + 0x00c1: 0x0405, # CYRILLIC CAPITAL LETTER DZE + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x221a, # SQUARE ROOT + 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00c5: 0x2248, # ALMOST EQUAL TO + 0x00c6: 0x2206, # INCREMENT + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x040b, # CYRILLIC CAPITAL LETTER TSHE + 0x00cc: 0x045b, # CYRILLIC SMALL LETTER TSHE + 0x00cd: 0x040c, # CYRILLIC CAPITAL LETTER KJE + 0x00ce: 0x045c, # CYRILLIC SMALL LETTER KJE + 0x00cf: 0x0455, # CYRILLIC SMALL LETTER DZE + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2014, # EM DASH + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00d8: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x00d9: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x00da: 0x040f, # CYRILLIC CAPITAL LETTER DZHE + 0x00db: 0x045f, # CYRILLIC SMALL LETTER DZHE + 0x00dc: 0x2116, # NUMERO SIGN + 0x00dd: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00de: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00df: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I + 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O + 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U + 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E + 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00ff: 0x20ac, # EURO SIGN }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> CONTROL CHARACTER + u'\x01' # 0x0001 -> CONTROL CHARACTER + u'\x02' # 0x0002 -> CONTROL CHARACTER + u'\x03' # 0x0003 -> CONTROL CHARACTER + u'\x04' # 0x0004 -> CONTROL CHARACTER + u'\x05' # 0x0005 -> CONTROL CHARACTER + u'\x06' # 0x0006 -> CONTROL CHARACTER + u'\x07' # 0x0007 -> CONTROL CHARACTER + u'\x08' # 0x0008 -> CONTROL CHARACTER + u'\t' # 0x0009 -> CONTROL CHARACTER + u'\n' # 0x000a -> CONTROL CHARACTER + u'\x0b' # 0x000b -> CONTROL CHARACTER + u'\x0c' # 0x000c -> CONTROL CHARACTER + u'\r' # 0x000d -> CONTROL CHARACTER + u'\x0e' # 0x000e -> CONTROL CHARACTER + u'\x0f' # 0x000f -> CONTROL CHARACTER + u'\x10' # 0x0010 -> CONTROL CHARACTER + u'\x11' # 0x0011 -> CONTROL CHARACTER + u'\x12' # 0x0012 -> CONTROL CHARACTER + u'\x13' # 0x0013 -> CONTROL CHARACTER + u'\x14' # 0x0014 -> CONTROL CHARACTER + u'\x15' # 0x0015 -> CONTROL CHARACTER + u'\x16' # 0x0016 -> CONTROL CHARACTER + u'\x17' # 0x0017 -> CONTROL CHARACTER + u'\x18' # 0x0018 -> CONTROL CHARACTER + u'\x19' # 0x0019 -> CONTROL CHARACTER + u'\x1a' # 0x001a -> CONTROL CHARACTER + u'\x1b' # 0x001b -> CONTROL CHARACTER + u'\x1c' # 0x001c -> CONTROL CHARACTER + u'\x1d' # 0x001d -> CONTROL CHARACTER + u'\x1e' # 0x001e -> CONTROL CHARACTER + u'\x1f' # 0x001f -> CONTROL CHARACTER + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> CONTROL CHARACTER + u'\u0410' # 0x0080 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0x0081 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0x0082 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0x0083 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0x0084 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0x0085 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0x0086 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0x0087 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0x0088 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0x0089 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0x008a -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0x008b -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0x008c -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0x008d -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0x008e -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0x008f -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0x0090 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0x0091 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0x0092 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0x0093 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0x0094 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0x0095 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0x0096 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0x0097 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0x0098 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0x0099 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0x009b -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0x009d -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0x009e -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0x009f -> CYRILLIC CAPITAL LETTER YA + u'\u2020' # 0x00a0 -> DAGGER + u'\xb0' # 0x00a1 -> DEGREE SIGN + u'\u0490' # 0x00a2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa7' # 0x00a4 -> SECTION SIGN + u'\u2022' # 0x00a5 -> BULLET + u'\xb6' # 0x00a6 -> PILCROW SIGN + u'\u0406' # 0x00a7 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\xae' # 0x00a8 -> REGISTERED SIGN + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u2122' # 0x00aa -> TRADE MARK SIGN + u'\u0402' # 0x00ab -> CYRILLIC CAPITAL LETTER DJE + u'\u0452' # 0x00ac -> CYRILLIC SMALL LETTER DJE + u'\u2260' # 0x00ad -> NOT EQUAL TO + u'\u0403' # 0x00ae -> CYRILLIC CAPITAL LETTER GJE + u'\u0453' # 0x00af -> CYRILLIC SMALL LETTER GJE + u'\u221e' # 0x00b0 -> INFINITY + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u2264' # 0x00b2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x00b3 -> GREATER-THAN OR EQUAL TO + u'\u0456' # 0x00b4 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\u0491' # 0x00b6 -> CYRILLIC SMALL LETTER GHE WITH UPTURN + u'\u0408' # 0x00b7 -> CYRILLIC CAPITAL LETTER JE + u'\u0404' # 0x00b8 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0454' # 0x00b9 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0407' # 0x00ba -> CYRILLIC CAPITAL LETTER YI + u'\u0457' # 0x00bb -> CYRILLIC SMALL LETTER YI + u'\u0409' # 0x00bc -> CYRILLIC CAPITAL LETTER LJE + u'\u0459' # 0x00bd -> CYRILLIC SMALL LETTER LJE + u'\u040a' # 0x00be -> CYRILLIC CAPITAL LETTER NJE + u'\u045a' # 0x00bf -> CYRILLIC SMALL LETTER NJE + u'\u0458' # 0x00c0 -> CYRILLIC SMALL LETTER JE + u'\u0405' # 0x00c1 -> CYRILLIC CAPITAL LETTER DZE + u'\xac' # 0x00c2 -> NOT SIGN + u'\u221a' # 0x00c3 -> SQUARE ROOT + u'\u0192' # 0x00c4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0x00c5 -> ALMOST EQUAL TO + u'\u2206' # 0x00c6 -> INCREMENT + u'\xab' # 0x00c7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00c8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0x00c9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0x00ca -> NO-BREAK SPACE + u'\u040b' # 0x00cb -> CYRILLIC CAPITAL LETTER TSHE + u'\u045b' # 0x00cc -> CYRILLIC SMALL LETTER TSHE + u'\u040c' # 0x00cd -> CYRILLIC CAPITAL LETTER KJE + u'\u045c' # 0x00ce -> CYRILLIC SMALL LETTER KJE + u'\u0455' # 0x00cf -> CYRILLIC SMALL LETTER DZE + u'\u2013' # 0x00d0 -> EN DASH + u'\u2014' # 0x00d1 -> EM DASH + u'\u201c' # 0x00d2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x00d3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0x00d4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x00d5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0x00d6 -> DIVISION SIGN + u'\u201e' # 0x00d7 -> DOUBLE LOW-9 QUOTATION MARK + u'\u040e' # 0x00d8 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045e' # 0x00d9 -> CYRILLIC SMALL LETTER SHORT U + u'\u040f' # 0x00da -> CYRILLIC CAPITAL LETTER DZHE + u'\u045f' # 0x00db -> CYRILLIC SMALL LETTER DZHE + u'\u2116' # 0x00dc -> NUMERO SIGN + u'\u0401' # 0x00dd -> CYRILLIC CAPITAL LETTER IO + u'\u0451' # 0x00de -> CYRILLIC SMALL LETTER IO + u'\u044f' # 0x00df -> CYRILLIC SMALL LETTER YA + u'\u0430' # 0x00e0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0x00e1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0x00e2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0x00e3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0x00e4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0x00e5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0x00e6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0x00e7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0x00e8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0x00e9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0x00ea -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0x00eb -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0x00ec -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0x00ed -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0x00ee -> CYRILLIC SMALL LETTER O + u'\u043f' # 0x00ef -> CYRILLIC SMALL LETTER PE + u'\u0440' # 0x00f0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0x00f1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0x00f2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0x00f3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0x00f4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0x00f5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0x00f6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0x00f7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0x00f8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0x00f9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0x00fa -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0x00fb -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0x00fc -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0x00fd -> CYRILLIC SMALL LETTER E + u'\u044e' # 0x00fe -> CYRILLIC SMALL LETTER YU + u'\u20ac' # 0x00ff -> EURO SIGN +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # CONTROL CHARACTER + 0x0001: 0x0001, # CONTROL CHARACTER + 0x0002: 0x0002, # CONTROL CHARACTER + 0x0003: 0x0003, # CONTROL CHARACTER + 0x0004: 0x0004, # CONTROL CHARACTER + 0x0005: 0x0005, # CONTROL CHARACTER + 0x0006: 0x0006, # CONTROL CHARACTER + 0x0007: 0x0007, # CONTROL CHARACTER + 0x0008: 0x0008, # CONTROL CHARACTER + 0x0009: 0x0009, # CONTROL CHARACTER + 0x000a: 0x000a, # CONTROL CHARACTER + 0x000b: 0x000b, # CONTROL CHARACTER + 0x000c: 0x000c, # CONTROL CHARACTER + 0x000d: 0x000d, # CONTROL CHARACTER + 0x000e: 0x000e, # CONTROL CHARACTER + 0x000f: 0x000f, # CONTROL CHARACTER + 0x0010: 0x0010, # CONTROL CHARACTER + 0x0011: 0x0011, # CONTROL CHARACTER + 0x0012: 0x0012, # CONTROL CHARACTER + 0x0013: 0x0013, # CONTROL CHARACTER + 0x0014: 0x0014, # CONTROL CHARACTER + 0x0015: 0x0015, # CONTROL CHARACTER + 0x0016: 0x0016, # CONTROL CHARACTER + 0x0017: 0x0017, # CONTROL CHARACTER + 0x0018: 0x0018, # CONTROL CHARACTER + 0x0019: 0x0019, # CONTROL CHARACTER + 0x001a: 0x001a, # CONTROL CHARACTER + 0x001b: 0x001b, # CONTROL CHARACTER + 0x001c: 0x001c, # CONTROL CHARACTER + 0x001d: 0x001d, # CONTROL CHARACTER + 0x001e: 0x001e, # CONTROL CHARACTER + 0x001f: 0x001f, # CONTROL CHARACTER + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # CONTROL CHARACTER + 0x00a0: 0x00ca, # NO-BREAK SPACE + 0x00a3: 0x00a3, # POUND SIGN + 0x00a7: 0x00a4, # SECTION SIGN + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00c7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00c2, # NOT SIGN + 0x00ae: 0x00a8, # REGISTERED SIGN + 0x00b0: 0x00a1, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00a6, # PILCROW SIGN + 0x00bb: 0x00c8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00f7: 0x00d6, # DIVISION SIGN + 0x0192: 0x00c4, # LATIN SMALL LETTER F WITH HOOK + 0x0401: 0x00dd, # CYRILLIC CAPITAL LETTER IO + 0x0402: 0x00ab, # CYRILLIC CAPITAL LETTER DJE + 0x0403: 0x00ae, # CYRILLIC CAPITAL LETTER GJE + 0x0404: 0x00b8, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0x00c1, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0x00a7, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0x00ba, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0x00b7, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0x00bc, # CYRILLIC CAPITAL LETTER LJE + 0x040a: 0x00be, # CYRILLIC CAPITAL LETTER NJE + 0x040b: 0x00cb, # CYRILLIC CAPITAL LETTER TSHE + 0x040c: 0x00cd, # CYRILLIC CAPITAL LETTER KJE + 0x040e: 0x00d8, # CYRILLIC CAPITAL LETTER SHORT U + 0x040f: 0x00da, # CYRILLIC CAPITAL LETTER DZHE + 0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x0083, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x0084, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x0085, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x0086, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x0087, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x0088, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x0089, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x008a, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x008b, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x008c, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x008d, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x008e, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x008f, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x0090, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x0091, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x0092, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x0093, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x0094, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x0095, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x0096, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x0097, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x0098, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x0099, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x009a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x009b, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x009c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x009d, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x009e, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x009f, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00e0, # CYRILLIC SMALL LETTER A + 0x0431: 0x00e1, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00e2, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00e3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00e4, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00e5, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00e6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00e7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00e8, # CYRILLIC SMALL LETTER I + 0x0439: 0x00e9, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00ea, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00eb, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00ec, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00ed, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00ee, # CYRILLIC SMALL LETTER O + 0x043f: 0x00ef, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00f0, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00f1, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00f2, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00f3, # CYRILLIC SMALL LETTER U + 0x0444: 0x00f4, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00f5, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00f6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00f7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00f8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00f9, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x00fa, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00fb, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00fc, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00fd, # CYRILLIC SMALL LETTER E + 0x044e: 0x00fe, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00df, # CYRILLIC SMALL LETTER YA + 0x0451: 0x00de, # CYRILLIC SMALL LETTER IO + 0x0452: 0x00ac, # CYRILLIC SMALL LETTER DJE + 0x0453: 0x00af, # CYRILLIC SMALL LETTER GJE + 0x0454: 0x00b9, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0x00cf, # CYRILLIC SMALL LETTER DZE + 0x0456: 0x00b4, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0x00bb, # CYRILLIC SMALL LETTER YI + 0x0458: 0x00c0, # CYRILLIC SMALL LETTER JE + 0x0459: 0x00bd, # CYRILLIC SMALL LETTER LJE + 0x045a: 0x00bf, # CYRILLIC SMALL LETTER NJE + 0x045b: 0x00cc, # CYRILLIC SMALL LETTER TSHE + 0x045c: 0x00ce, # CYRILLIC SMALL LETTER KJE + 0x045e: 0x00d9, # CYRILLIC SMALL LETTER SHORT U + 0x045f: 0x00db, # CYRILLIC SMALL LETTER DZHE + 0x0490: 0x00a2, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x0491: 0x00b6, # CYRILLIC SMALL LETTER GHE WITH UPTURN + 0x2013: 0x00d0, # EN DASH + 0x2014: 0x00d1, # EM DASH + 0x2018: 0x00d4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x00d5, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0x00d2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00d3, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00d7, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x00a0, # DAGGER + 0x2022: 0x00a5, # BULLET + 0x2026: 0x00c9, # HORIZONTAL ELLIPSIS + 0x20ac: 0x00ff, # EURO SIGN + 0x2116: 0x00dc, # NUMERO SIGN + 0x2122: 0x00aa, # TRADE MARK SIGN + 0x2206: 0x00c6, # INCREMENT + 0x221a: 0x00c3, # SQUARE ROOT + 0x221e: 0x00b0, # INFINITY + 0x2248: 0x00c5, # ALMOST EQUAL TO + 0x2260: 0x00ad, # NOT EQUAL TO + 0x2264: 0x00b2, # LESS-THAN OR EQUAL TO + 0x2265: 0x00b3, # GREATER-THAN OR EQUAL TO +} \ No newline at end of file Index: mac_greek.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_greek.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- mac_greek.py 8 Aug 2002 20:19:19 -0000 1.4 +++ mac_greek.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'GREEK.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/APPLE/GREEK.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,134 +32,652 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x00b9, # SUPERSCRIPT ONE - 0x0082: 0x00b2, # SUPERSCRIPT TWO - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x00b3, # SUPERSCRIPT THREE - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x0385, # GREEK DIALYTIKA TONOS - 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x0384, # GREEK TONOS - 0x008c: 0x00a8, # DIAERESIS - 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0092: 0x00a3, # POUND SIGN - 0x0093: 0x2122, # TRADE MARK SIGN - 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0096: 0x2022, # BULLET - 0x0097: 0x00bd, # VULGAR FRACTION ONE HALF - 0x0098: 0x2030, # PER MILLE SIGN - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00a6, # BROKEN BAR - 0x009c: 0x00ad, # SOFT HYPHEN - 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x2020, # DAGGER - 0x00a1: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00a2: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x00a3: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00a4: 0x039b, # GREEK CAPITAL LETTER LAMBDA - 0x00a5: 0x039e, # GREEK CAPITAL LETTER XI - 0x00a6: 0x03a0, # GREEK CAPITAL LETTER PI - 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00ab: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x00ac: 0x00a7, # SECTION SIGN - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x00b0, # DEGREE SIGN - 0x00af: 0x0387, # GREEK ANO TELEIA - 0x00b0: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x00a5, # YEN SIGN - 0x00b5: 0x0392, # GREEK CAPITAL LETTER BETA - 0x00b6: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x00b7: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x00b8: 0x0397, # GREEK CAPITAL LETTER ETA - 0x00b9: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x00ba: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x00bb: 0x039c, # GREEK CAPITAL LETTER MU - 0x00bc: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00bd: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x00be: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x00bf: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00c0: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00c1: 0x039d, # GREEK CAPITAL LETTER NU - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x00c4: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x00c5: 0x2248, # ALMOST EQUAL TO - 0x00c6: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x00cc: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x00cd: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x00ce: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2015, # HORIZONTAL BAR - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x00d8: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x00d9: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x00da: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x00db: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00dc: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00dd: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00de: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00df: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x00e0: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA - 0x00e3: 0x03c8, # GREEK SMALL LETTER PSI - 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00e6: 0x03c6, # GREEK SMALL LETTER PHI - 0x00e7: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x00e8: 0x03b7, # GREEK SMALL LETTER ETA - 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00ea: 0x03be, # GREEK SMALL LETTER XI - 0x00eb: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00ec: 0x03bb, # GREEK SMALL LETTER LAMBDA - 0x00ed: 0x03bc, # GREEK SMALL LETTER MU - 0x00ee: 0x03bd, # GREEK SMALL LETTER NU - 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00f0: 0x03c0, # GREEK SMALL LETTER PI - 0x00f1: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00f2: 0x03c1, # GREEK SMALL LETTER RHO - 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU - 0x00f5: 0x03b8, # GREEK SMALL LETTER THETA - 0x00f6: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00f7: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00f8: 0x03c7, # GREEK SMALL LETTER CHI - 0x00f9: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00fa: 0x03b6, # GREEK SMALL LETTER ZETA - 0x00fb: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00fc: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00ff: None, # UNDEFINED + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x00b9, # SUPERSCRIPT ONE + 0x0082: 0x00b2, # SUPERSCRIPT TWO + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x00b3, # SUPERSCRIPT THREE + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x0385, # GREEK DIALYTIKA TONOS + 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x0384, # GREEK TONOS + 0x008c: 0x00a8, # DIAERESIS + 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0092: 0x00a3, # POUND SIGN + 0x0093: 0x2122, # TRADE MARK SIGN + 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0096: 0x2022, # BULLET + 0x0097: 0x00bd, # VULGAR FRACTION ONE HALF + 0x0098: 0x2030, # PER MILLE SIGN + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00a6, # BROKEN BAR + 0x009c: 0x20ac, # EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN + 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x2020, # DAGGER + 0x00a1: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00a2: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x00a3: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00a4: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x00a5: 0x039e, # GREEK CAPITAL LETTER XI + 0x00a6: 0x03a0, # GREEK CAPITAL LETTER PI + 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00ab: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x00ac: 0x00a7, # SECTION SIGN + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x00b0, # DEGREE SIGN + 0x00af: 0x00b7, # MIDDLE DOT + 0x00b0: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x00a5, # YEN SIGN + 0x00b5: 0x0392, # GREEK CAPITAL LETTER BETA + 0x00b6: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x00b7: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x00b8: 0x0397, # GREEK CAPITAL LETTER ETA + 0x00b9: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x00ba: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x00bb: 0x039c, # GREEK CAPITAL LETTER MU + 0x00bc: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00bd: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x00be: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x00bf: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00c0: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x00c1: 0x039d, # GREEK CAPITAL LETTER NU + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x00c4: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00c5: 0x2248, # ALMOST EQUAL TO + 0x00c6: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x00cc: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x00cd: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x00ce: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2015, # HORIZONTAL BAR + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x00d8: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x00d9: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x00da: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x00db: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x00dc: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x00dd: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00de: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00df: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x00e0: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e2: 0x03b2, # GREEK SMALL LETTER BETA + 0x00e3: 0x03c8, # GREEK SMALL LETTER PSI + 0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00e6: 0x03c6, # GREEK SMALL LETTER PHI + 0x00e7: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x00e8: 0x03b7, # GREEK SMALL LETTER ETA + 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00ea: 0x03be, # GREEK SMALL LETTER XI + 0x00eb: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00ec: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00ed: 0x03bc, # GREEK SMALL LETTER MU + 0x00ee: 0x03bd, # GREEK SMALL LETTER NU + 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00f0: 0x03c0, # GREEK SMALL LETTER PI + 0x00f1: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00f2: 0x03c1, # GREEK SMALL LETTER RHO + 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU + 0x00f5: 0x03b8, # GREEK SMALL LETTER THETA + 0x00f6: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00f7: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00f8: 0x03c7, # GREEK SMALL LETTER CHI + 0x00f9: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00fa: 0x03b6, # GREEK SMALL LETTER ZETA + 0x00fb: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00fc: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x00ff: 0x00ad, # SOFT HYPHEN # before Mac OS 9.2.2, was undefined }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> CONTROL CHARACTER + u'\x01' # 0x0001 -> CONTROL CHARACTER + u'\x02' # 0x0002 -> CONTROL CHARACTER + u'\x03' # 0x0003 -> CONTROL CHARACTER + u'\x04' # 0x0004 -> CONTROL CHARACTER + u'\x05' # 0x0005 -> CONTROL CHARACTER + u'\x06' # 0x0006 -> CONTROL CHARACTER + u'\x07' # 0x0007 -> CONTROL CHARACTER + u'\x08' # 0x0008 -> CONTROL CHARACTER + u'\t' # 0x0009 -> CONTROL CHARACTER + u'\n' # 0x000a -> CONTROL CHARACTER + u'\x0b' # 0x000b -> CONTROL CHARACTER + u'\x0c' # 0x000c -> CONTROL CHARACTER + u'\r' # 0x000d -> CONTROL CHARACTER + u'\x0e' # 0x000e -> CONTROL CHARACTER + u'\x0f' # 0x000f -> CONTROL CHARACTER + u'\x10' # 0x0010 -> CONTROL CHARACTER + u'\x11' # 0x0011 -> CONTROL CHARACTER + u'\x12' # 0x0012 -> CONTROL CHARACTER + u'\x13' # 0x0013 -> CONTROL CHARACTER + u'\x14' # 0x0014 -> CONTROL CHARACTER + u'\x15' # 0x0015 -> CONTROL CHARACTER + u'\x16' # 0x0016 -> CONTROL CHARACTER + u'\x17' # 0x0017 -> CONTROL CHARACTER + u'\x18' # 0x0018 -> CONTROL CHARACTER + u'\x19' # 0x0019 -> CONTROL CHARACTER + u'\x1a' # 0x001a -> CONTROL CHARACTER + u'\x1b' # 0x001b -> CONTROL CHARACTER + u'\x1c' # 0x001c -> CONTROL CHARACTER + u'\x1d' # 0x001d -> CONTROL CHARACTER + u'\x1e' # 0x001e -> CONTROL CHARACTER + u'\x1f' # 0x001f -> CONTROL CHARACTER + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> CONTROL CHARACTER + u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xb9' # 0x0081 -> SUPERSCRIPT ONE + u'\xb2' # 0x0082 -> SUPERSCRIPT TWO + u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xb3' # 0x0084 -> SUPERSCRIPT THREE + u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0385' # 0x0087 -> GREEK DIALYTIKA TONOS + u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u0384' # 0x008b -> GREEK TONOS + u'\xa8' # 0x008c -> DIAERESIS + u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xa3' # 0x0092 -> POUND SIGN + u'\u2122' # 0x0093 -> TRADE MARK SIGN + u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u2022' # 0x0096 -> BULLET + u'\xbd' # 0x0097 -> VULGAR FRACTION ONE HALF + u'\u2030' # 0x0098 -> PER MILLE SIGN + u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xa6' # 0x009b -> BROKEN BAR + u'\u20ac' # 0x009c -> EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN + u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0x00a0 -> DAGGER + u'\u0393' # 0x00a1 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x00a2 -> GREEK CAPITAL LETTER DELTA + u'\u0398' # 0x00a3 -> GREEK CAPITAL LETTER THETA + u'\u039b' # 0x00a4 -> GREEK CAPITAL LETTER LAMDA + u'\u039e' # 0x00a5 -> GREEK CAPITAL LETTER XI + u'\u03a0' # 0x00a6 -> GREEK CAPITAL LETTER PI + u'\xdf' # 0x00a7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0x00a8 -> REGISTERED SIGN + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u03a3' # 0x00aa -> GREEK CAPITAL LETTER SIGMA + u'\u03aa' # 0x00ab -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\xa7' # 0x00ac -> SECTION SIGN + u'\u2260' # 0x00ad -> NOT EQUAL TO + u'\xb0' # 0x00ae -> DEGREE SIGN + u'\xb7' # 0x00af -> MIDDLE DOT + u'\u0391' # 0x00b0 -> GREEK CAPITAL LETTER ALPHA + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u2264' # 0x00b2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x00b3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0x00b4 -> YEN SIGN + u'\u0392' # 0x00b5 -> GREEK CAPITAL LETTER BETA + u'\u0395' # 0x00b6 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x00b7 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x00b8 -> GREEK CAPITAL LETTER ETA + u'\u0399' # 0x00b9 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0x00ba -> GREEK CAPITAL LETTER KAPPA + u'\u039c' # 0x00bb -> GREEK CAPITAL LETTER MU + u'\u03a6' # 0x00bc -> GREEK CAPITAL LETTER PHI + u'\u03ab' # 0x00bd -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\u03a8' # 0x00be -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x00bf -> GREEK CAPITAL LETTER OMEGA + u'\u03ac' # 0x00c0 -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u039d' # 0x00c1 -> GREEK CAPITAL LETTER NU + u'\xac' # 0x00c2 -> NOT SIGN + u'\u039f' # 0x00c3 -> GREEK CAPITAL LETTER OMICRON + u'\u03a1' # 0x00c4 -> GREEK CAPITAL LETTER RHO + u'\u2248' # 0x00c5 -> ALMOST EQUAL TO + u'\u03a4' # 0x00c6 -> GREEK CAPITAL LETTER TAU + u'\xab' # 0x00c7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00c8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0x00c9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0x00ca -> NO-BREAK SPACE + u'\u03a5' # 0x00cb -> GREEK CAPITAL LETTER UPSILON + u'\u03a7' # 0x00cc -> GREEK CAPITAL LETTER CHI + u'\u0386' # 0x00cd -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\u0388' # 0x00ce -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0153' # 0x00cf -> LATIN SMALL LIGATURE OE + u'\u2013' # 0x00d0 -> EN DASH + u'\u2015' # 0x00d1 -> HORIZONTAL BAR + u'\u201c' # 0x00d2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x00d3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0x00d4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x00d5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0x00d6 -> DIVISION SIGN + u'\u0389' # 0x00d7 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0x00d8 -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\u038c' # 0x00d9 -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\u038e' # 0x00da -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u03ad' # 0x00db -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0x00dc -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0x00dd -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03cc' # 0x00de -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u038f' # 0x00df -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\u03cd' # 0x00e0 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03b1' # 0x00e1 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x00e2 -> GREEK SMALL LETTER BETA + u'\u03c8' # 0x00e3 -> GREEK SMALL LETTER PSI + u'\u03b4' # 0x00e4 -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x00e5 -> GREEK SMALL LETTER EPSILON + u'\u03c6' # 0x00e6 -> GREEK SMALL LETTER PHI + u'\u03b3' # 0x00e7 -> GREEK SMALL LETTER GAMMA + u'\u03b7' # 0x00e8 -> GREEK SMALL LETTER ETA + u'\u03b9' # 0x00e9 -> GREEK SMALL LETTER IOTA + u'\u03be' # 0x00ea -> GREEK SMALL LETTER XI + u'\u03ba' # 0x00eb -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x00ec -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x00ed -> GREEK SMALL LETTER MU + u'\u03bd' # 0x00ee -> GREEK SMALL LETTER NU + u'\u03bf' # 0x00ef -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0x00f0 -> GREEK SMALL LETTER PI + u'\u03ce' # 0x00f1 -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u03c1' # 0x00f2 -> GREEK SMALL LETTER RHO + u'\u03c3' # 0x00f3 -> GREEK SMALL LETTER SIGMA + u'\u03c4' # 0x00f4 -> GREEK SMALL LETTER TAU + u'\u03b8' # 0x00f5 -> GREEK SMALL LETTER THETA + u'\u03c9' # 0x00f6 -> GREEK SMALL LETTER OMEGA + u'\u03c2' # 0x00f7 -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c7' # 0x00f8 -> GREEK SMALL LETTER CHI + u'\u03c5' # 0x00f9 -> GREEK SMALL LETTER UPSILON + u'\u03b6' # 0x00fa -> GREEK SMALL LETTER ZETA + u'\u03ca' # 0x00fb -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03cb' # 0x00fc -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u0390' # 0x00fd -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u03b0' # 0x00fe -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\xad' # 0x00ff -> SOFT HYPHEN # before Mac OS 9.2.2, was undefined +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # CONTROL CHARACTER + 0x0001: 0x0001, # CONTROL CHARACTER + 0x0002: 0x0002, # CONTROL CHARACTER + 0x0003: 0x0003, # CONTROL CHARACTER + 0x0004: 0x0004, # CONTROL CHARACTER + 0x0005: 0x0005, # CONTROL CHARACTER + 0x0006: 0x0006, # CONTROL CHARACTER + 0x0007: 0x0007, # CONTROL CHARACTER + 0x0008: 0x0008, # CONTROL CHARACTER + 0x0009: 0x0009, # CONTROL CHARACTER + 0x000a: 0x000a, # CONTROL CHARACTER + 0x000b: 0x000b, # CONTROL CHARACTER + 0x000c: 0x000c, # CONTROL CHARACTER + 0x000d: 0x000d, # CONTROL CHARACTER + 0x000e: 0x000e, # CONTROL CHARACTER + 0x000f: 0x000f, # CONTROL CHARACTER + 0x0010: 0x0010, # CONTROL CHARACTER + 0x0011: 0x0011, # CONTROL CHARACTER + 0x0012: 0x0012, # CONTROL CHARACTER + 0x0013: 0x0013, # CONTROL CHARACTER + 0x0014: 0x0014, # CONTROL CHARACTER + 0x0015: 0x0015, # CONTROL CHARACTER + 0x0016: 0x0016, # CONTROL CHARACTER + 0x0017: 0x0017, # CONTROL CHARACTER + 0x0018: 0x0018, # CONTROL CHARACTER + 0x0019: 0x0019, # CONTROL CHARACTER + 0x001a: 0x001a, # CONTROL CHARACTER + 0x001b: 0x001b, # CONTROL CHARACTER + 0x001c: 0x001c, # CONTROL CHARACTER + 0x001d: 0x001d, # CONTROL CHARACTER + 0x001e: 0x001e, # CONTROL CHARACTER + 0x001f: 0x001f, # CONTROL CHARACTER + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # CONTROL CHARACTER + 0x00a0: 0x00ca, # NO-BREAK SPACE + 0x00a3: 0x0092, # POUND SIGN + 0x00a5: 0x00b4, # YEN SIGN + 0x00a6: 0x009b, # BROKEN BAR + 0x00a7: 0x00ac, # SECTION SIGN + 0x00a8: 0x008c, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00ab: 0x00c7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00c2, # NOT SIGN + 0x00ad: 0x00ff, # SOFT HYPHEN # before Mac OS 9.2.2, was undefined + 0x00ae: 0x00a8, # REGISTERED SIGN + 0x00b0: 0x00ae, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b2: 0x0082, # SUPERSCRIPT TWO + 0x00b3: 0x0084, # SUPERSCRIPT THREE + 0x00b7: 0x00af, # MIDDLE DOT + 0x00b9: 0x0081, # SUPERSCRIPT ONE + 0x00bb: 0x00c8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bd: 0x0097, # VULGAR FRACTION ONE HALF + 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00a7, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE + 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00d6, # DIVISION SIGN + 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE + 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0153: 0x00cf, # LATIN SMALL LIGATURE OE + 0x0384: 0x008b, # GREEK TONOS + 0x0385: 0x0087, # GREEK DIALYTIKA TONOS + 0x0386: 0x00cd, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0x00ce, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0x00d7, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038a: 0x00d8, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038c: 0x00d9, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038e: 0x00da, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038f: 0x00df, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0x00fd, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0x00b0, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0x00b5, # GREEK CAPITAL LETTER BETA + 0x0393: 0x00a1, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0x00a2, # GREEK CAPITAL LETTER DELTA + 0x0395: 0x00b6, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0x00b7, # GREEK CAPITAL LETTER ZETA + 0x0397: 0x00b8, # GREEK CAPITAL LETTER ETA + 0x0398: 0x00a3, # GREEK CAPITAL LETTER THETA + 0x0399: 0x00b9, # GREEK CAPITAL LETTER IOTA + 0x039a: 0x00ba, # GREEK CAPITAL LETTER KAPPA + 0x039b: 0x00a4, # GREEK CAPITAL LETTER LAMDA + 0x039c: 0x00bb, # GREEK CAPITAL LETTER MU + 0x039d: 0x00c1, # GREEK CAPITAL LETTER NU + 0x039e: 0x00a5, # GREEK CAPITAL LETTER XI + 0x039f: 0x00c3, # GREEK CAPITAL LETTER OMICRON + 0x03a0: 0x00a6, # GREEK CAPITAL LETTER PI + 0x03a1: 0x00c4, # GREEK CAPITAL LETTER RHO + 0x03a3: 0x00aa, # GREEK CAPITAL LETTER SIGMA + 0x03a4: 0x00c6, # GREEK CAPITAL LETTER TAU + 0x03a5: 0x00cb, # GREEK CAPITAL LETTER UPSILON + 0x03a6: 0x00bc, # GREEK CAPITAL LETTER PHI + 0x03a7: 0x00cc, # GREEK CAPITAL LETTER CHI + 0x03a8: 0x00be, # GREEK CAPITAL LETTER PSI + 0x03a9: 0x00bf, # GREEK CAPITAL LETTER OMEGA + 0x03aa: 0x00ab, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03ab: 0x00bd, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03ac: 0x00c0, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03ad: 0x00db, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03ae: 0x00dc, # GREEK SMALL LETTER ETA WITH TONOS + 0x03af: 0x00dd, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03b0: 0x00fe, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03b1: 0x00e1, # GREEK SMALL LETTER ALPHA + 0x03b2: 0x00e2, # GREEK SMALL LETTER BETA + 0x03b3: 0x00e7, # GREEK SMALL LETTER GAMMA + 0x03b4: 0x00e4, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00e5, # GREEK SMALL LETTER EPSILON + 0x03b6: 0x00fa, # GREEK SMALL LETTER ZETA + 0x03b7: 0x00e8, # GREEK SMALL LETTER ETA + 0x03b8: 0x00f5, # GREEK SMALL LETTER THETA + 0x03b9: 0x00e9, # GREEK SMALL LETTER IOTA + 0x03ba: 0x00eb, # GREEK SMALL LETTER KAPPA + 0x03bb: 0x00ec, # GREEK SMALL LETTER LAMDA + 0x03bc: 0x00ed, # GREEK SMALL LETTER MU + 0x03bd: 0x00ee, # GREEK SMALL LETTER NU + 0x03be: 0x00ea, # GREEK SMALL LETTER XI + 0x03bf: 0x00ef, # GREEK SMALL LETTER OMICRON + 0x03c0: 0x00f0, # GREEK SMALL LETTER PI + 0x03c1: 0x00f2, # GREEK SMALL LETTER RHO + 0x03c2: 0x00f7, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0x00f3, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00f4, # GREEK SMALL LETTER TAU + 0x03c5: 0x00f9, # GREEK SMALL LETTER UPSILON + 0x03c6: 0x00e6, # GREEK SMALL LETTER PHI + 0x03c7: 0x00f8, # GREEK SMALL LETTER CHI + 0x03c8: 0x00e3, # GREEK SMALL LETTER PSI + 0x03c9: 0x00f6, # GREEK SMALL LETTER OMEGA + 0x03ca: 0x00fb, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0x00fc, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0x00de, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0x00e0, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0x00f1, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2013: 0x00d0, # EN DASH + 0x2015: 0x00d1, # HORIZONTAL BAR + 0x2018: 0x00d4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x00d5, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0x00d2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00d3, # RIGHT DOUBLE QUOTATION MARK + 0x2020: 0x00a0, # DAGGER + 0x2022: 0x0096, # BULLET + 0x2026: 0x00c9, # HORIZONTAL ELLIPSIS + 0x2030: 0x0098, # PER MILLE SIGN + 0x20ac: 0x009c, # EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN + 0x2122: 0x0093, # TRADE MARK SIGN + 0x2248: 0x00c5, # ALMOST EQUAL TO + 0x2260: 0x00ad, # NOT EQUAL TO + 0x2264: 0x00b2, # LESS-THAN OR EQUAL TO + 0x2265: 0x00b3, # GREATER-THAN OR EQUAL TO +} \ No newline at end of file Index: mac_iceland.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_iceland.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- mac_iceland.py 8 Aug 2002 20:19:19 -0000 1.4 +++ mac_iceland.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'ICELAND.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/APPLE/ICELAND.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,130 +32,648 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00a1: 0x00b0, # DEGREE SIGN - 0x00a4: 0x00a7, # SECTION SIGN - 0x00a5: 0x2022, # BULLET - 0x00a6: 0x00b6, # PILCROW SIGN - 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x2122, # TRADE MARK SIGN - 0x00ab: 0x00b4, # ACUTE ACCENT - 0x00ac: 0x00a8, # DIAERESIS - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00b0: 0x221e, # INFINITY - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x00a5, # YEN SIGN - 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL - 0x00b7: 0x2211, # N-ARY SUMMATION - 0x00b8: 0x220f, # N-ARY PRODUCT - 0x00b9: 0x03c0, # GREEK SMALL LETTER PI - 0x00ba: 0x222b, # INTEGRAL - 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00bd: 0x2126, # OHM SIGN - 0x00be: 0x00e6, # LATIN SMALL LIGATURE AE - 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00c0: 0x00bf, # INVERTED QUESTION MARK - 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x221a, # SQUARE ROOT - 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00c5: 0x2248, # ALMOST EQUAL TO - 0x00c6: 0x2206, # INCREMENT - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2014, # EM DASH - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x25ca, # LOZENGE - 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00da: 0x2044, # FRACTION SLASH - 0x00db: 0x00a4, # CURRENCY SIGN - 0x00dc: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x00dd: 0x00f0, # LATIN SMALL LETTER ETH - 0x00df: 0x00fe, # LATIN SMALL LETTER THORN - 0x00e0: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00e1: 0x00b7, # MIDDLE DOT - 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00e4: 0x2030, # PER MILLE SIGN - 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00f0: None, # UNDEFINED - 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x00f7: 0x02dc, # SMALL TILDE - 0x00f8: 0x00af, # MACRON - 0x00f9: 0x02d8, # BREVE - 0x00fa: 0x02d9, # DOT ABOVE - 0x00fb: 0x02da, # RING ABOVE - 0x00fc: 0x00b8, # CEDILLA - 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00fe: 0x02db, # OGONEK - 0x00ff: 0x02c7, # CARON + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00a1: 0x00b0, # DEGREE SIGN + 0x00a4: 0x00a7, # SECTION SIGN + 0x00a5: 0x2022, # BULLET + 0x00a6: 0x00b6, # PILCROW SIGN + 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x2122, # TRADE MARK SIGN + 0x00ab: 0x00b4, # ACUTE ACCENT + 0x00ac: 0x00a8, # DIAERESIS + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00b0: 0x221e, # INFINITY + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x00a5, # YEN SIGN + 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL + 0x00b7: 0x2211, # N-ARY SUMMATION + 0x00b8: 0x220f, # N-ARY PRODUCT + 0x00b9: 0x03c0, # GREEK SMALL LETTER PI + 0x00ba: 0x222b, # INTEGRAL + 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bd: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00be: 0x00e6, # LATIN SMALL LETTER AE + 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00c0: 0x00bf, # INVERTED QUESTION MARK + 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x221a, # SQUARE ROOT + 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00c5: 0x2248, # ALMOST EQUAL TO + 0x00c6: 0x2206, # INCREMENT + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2014, # EM DASH + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x25ca, # LOZENGE + 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00da: 0x2044, # FRACTION SLASH + 0x00db: 0x20ac, # EURO SIGN + 0x00dc: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x00dd: 0x00f0, # LATIN SMALL LETTER ETH + 0x00df: 0x00fe, # LATIN SMALL LETTER THORN + 0x00e0: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00e1: 0x00b7, # MIDDLE DOT + 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00e4: 0x2030, # PER MILLE SIGN + 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00f0: 0xf8ff, # Apple logo + 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x00f7: 0x02dc, # SMALL TILDE + 0x00f8: 0x00af, # MACRON + 0x00f9: 0x02d8, # BREVE + 0x00fa: 0x02d9, # DOT ABOVE + 0x00fb: 0x02da, # RING ABOVE + 0x00fc: 0x00b8, # CEDILLA + 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00fe: 0x02db, # OGONEK + 0x00ff: 0x02c7, # CARON }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> CONTROL CHARACTER + u'\x01' # 0x0001 -> CONTROL CHARACTER + u'\x02' # 0x0002 -> CONTROL CHARACTER + u'\x03' # 0x0003 -> CONTROL CHARACTER + u'\x04' # 0x0004 -> CONTROL CHARACTER + u'\x05' # 0x0005 -> CONTROL CHARACTER + u'\x06' # 0x0006 -> CONTROL CHARACTER + u'\x07' # 0x0007 -> CONTROL CHARACTER + u'\x08' # 0x0008 -> CONTROL CHARACTER + u'\t' # 0x0009 -> CONTROL CHARACTER + u'\n' # 0x000a -> CONTROL CHARACTER + u'\x0b' # 0x000b -> CONTROL CHARACTER + u'\x0c' # 0x000c -> CONTROL CHARACTER + u'\r' # 0x000d -> CONTROL CHARACTER + u'\x0e' # 0x000e -> CONTROL CHARACTER + u'\x0f' # 0x000f -> CONTROL CHARACTER + u'\x10' # 0x0010 -> CONTROL CHARACTER + u'\x11' # 0x0011 -> CONTROL CHARACTER + u'\x12' # 0x0012 -> CONTROL CHARACTER + u'\x13' # 0x0013 -> CONTROL CHARACTER + u'\x14' # 0x0014 -> CONTROL CHARACTER + u'\x15' # 0x0015 -> CONTROL CHARACTER + u'\x16' # 0x0016 -> CONTROL CHARACTER + u'\x17' # 0x0017 -> CONTROL CHARACTER + u'\x18' # 0x0018 -> CONTROL CHARACTER + u'\x19' # 0x0019 -> CONTROL CHARACTER + u'\x1a' # 0x001a -> CONTROL CHARACTER + u'\x1b' # 0x001b -> CONTROL CHARACTER + u'\x1c' # 0x001c -> CONTROL CHARACTER + u'\x1d' # 0x001d -> CONTROL CHARACTER + u'\x1e' # 0x001e -> CONTROL CHARACTER + u'\x1f' # 0x001f -> CONTROL CHARACTER + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> CONTROL CHARACTER + u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x0081 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x008b -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x008c -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x0093 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x0098 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x009b -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xdd' # 0x00a0 -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xb0' # 0x00a1 -> DEGREE SIGN + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa7' # 0x00a4 -> SECTION SIGN + u'\u2022' # 0x00a5 -> BULLET + u'\xb6' # 0x00a6 -> PILCROW SIGN + u'\xdf' # 0x00a7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0x00a8 -> REGISTERED SIGN + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u2122' # 0x00aa -> TRADE MARK SIGN + u'\xb4' # 0x00ab -> ACUTE ACCENT + u'\xa8' # 0x00ac -> DIAERESIS + u'\u2260' # 0x00ad -> NOT EQUAL TO + u'\xc6' # 0x00ae -> LATIN CAPITAL LETTER AE + u'\xd8' # 0x00af -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0x00b0 -> INFINITY + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u2264' # 0x00b2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x00b3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0x00b4 -> YEN SIGN + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\u2202' # 0x00b6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0x00b7 -> N-ARY SUMMATION + u'\u220f' # 0x00b8 -> N-ARY PRODUCT + u'\u03c0' # 0x00b9 -> GREEK SMALL LETTER PI + u'\u222b' # 0x00ba -> INTEGRAL + u'\xaa' # 0x00bb -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00bc -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0x00bd -> GREEK CAPITAL LETTER OMEGA + u'\xe6' # 0x00be -> LATIN SMALL LETTER AE + u'\xf8' # 0x00bf -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0x00c0 -> INVERTED QUESTION MARK + u'\xa1' # 0x00c1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0x00c2 -> NOT SIGN + u'\u221a' # 0x00c3 -> SQUARE ROOT + u'\u0192' # 0x00c4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0x00c5 -> ALMOST EQUAL TO + u'\u2206' # 0x00c6 -> INCREMENT + u'\xab' # 0x00c7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00c8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0x00c9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0x00ca -> NO-BREAK SPACE + u'\xc0' # 0x00cb -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0x00cc -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0x00cd -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0x00ce -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0x00cf -> LATIN SMALL LIGATURE OE + u'\u2013' # 0x00d0 -> EN DASH + u'\u2014' # 0x00d1 -> EM DASH + u'\u201c' # 0x00d2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x00d3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0x00d4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x00d5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0x00d6 -> DIVISION SIGN + u'\u25ca' # 0x00d7 -> LOZENGE + u'\xff' # 0x00d8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0x00d9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u2044' # 0x00da -> FRACTION SLASH + u'\u20ac' # 0x00db -> EURO SIGN + u'\xd0' # 0x00dc -> LATIN CAPITAL LETTER ETH + u'\xf0' # 0x00dd -> LATIN SMALL LETTER ETH + u'\xde' # 0x00de -> LATIN CAPITAL LETTER THORN + u'\xfe' # 0x00df -> LATIN SMALL LETTER THORN + u'\xfd' # 0x00e0 -> LATIN SMALL LETTER Y WITH ACUTE + u'\xb7' # 0x00e1 -> MIDDLE DOT + u'\u201a' # 0x00e2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0x00e3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0x00e4 -> PER MILLE SIGN + u'\xc2' # 0x00e5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0x00e6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0x00e7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0x00e8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x00e9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0x00ea -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00eb -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00ec -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0x00ed -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0x00ee -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00ef -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0x00f0 -> Apple logo + u'\xd2' # 0x00f1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0x00f2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00f3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0x00f4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0x00f5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0x00f6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0x00f7 -> SMALL TILDE + u'\xaf' # 0x00f8 -> MACRON + u'\u02d8' # 0x00f9 -> BREVE + u'\u02d9' # 0x00fa -> DOT ABOVE + u'\u02da' # 0x00fb -> RING ABOVE + u'\xb8' # 0x00fc -> CEDILLA + u'\u02dd' # 0x00fd -> DOUBLE ACUTE ACCENT + u'\u02db' # 0x00fe -> OGONEK + u'\u02c7' # 0x00ff -> CARON +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # CONTROL CHARACTER + 0x0001: 0x0001, # CONTROL CHARACTER + 0x0002: 0x0002, # CONTROL CHARACTER + 0x0003: 0x0003, # CONTROL CHARACTER + 0x0004: 0x0004, # CONTROL CHARACTER + 0x0005: 0x0005, # CONTROL CHARACTER + 0x0006: 0x0006, # CONTROL CHARACTER + 0x0007: 0x0007, # CONTROL CHARACTER + 0x0008: 0x0008, # CONTROL CHARACTER + 0x0009: 0x0009, # CONTROL CHARACTER + 0x000a: 0x000a, # CONTROL CHARACTER + 0x000b: 0x000b, # CONTROL CHARACTER + 0x000c: 0x000c, # CONTROL CHARACTER + 0x000d: 0x000d, # CONTROL CHARACTER + 0x000e: 0x000e, # CONTROL CHARACTER + 0x000f: 0x000f, # CONTROL CHARACTER + 0x0010: 0x0010, # CONTROL CHARACTER + 0x0011: 0x0011, # CONTROL CHARACTER + 0x0012: 0x0012, # CONTROL CHARACTER + 0x0013: 0x0013, # CONTROL CHARACTER + 0x0014: 0x0014, # CONTROL CHARACTER + 0x0015: 0x0015, # CONTROL CHARACTER + 0x0016: 0x0016, # CONTROL CHARACTER + 0x0017: 0x0017, # CONTROL CHARACTER + 0x0018: 0x0018, # CONTROL CHARACTER + 0x0019: 0x0019, # CONTROL CHARACTER + 0x001a: 0x001a, # CONTROL CHARACTER + 0x001b: 0x001b, # CONTROL CHARACTER + 0x001c: 0x001c, # CONTROL CHARACTER + 0x001d: 0x001d, # CONTROL CHARACTER + 0x001e: 0x001e, # CONTROL CHARACTER + 0x001f: 0x001f, # CONTROL CHARACTER + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # CONTROL CHARACTER + 0x00a0: 0x00ca, # NO-BREAK SPACE + 0x00a1: 0x00c1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a5: 0x00b4, # YEN SIGN + 0x00a7: 0x00a4, # SECTION SIGN + 0x00a8: 0x00ac, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00bb, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00c7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00c2, # NOT SIGN + 0x00ae: 0x00a8, # REGISTERED SIGN + 0x00af: 0x00f8, # MACRON + 0x00b0: 0x00a1, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b4: 0x00ab, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00a6, # PILCROW SIGN + 0x00b7: 0x00e1, # MIDDLE DOT + 0x00b8: 0x00fc, # CEDILLA + 0x00ba: 0x00bc, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00c8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bf: 0x00c0, # INVERTED QUESTION MARK + 0x00c0: 0x00cb, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00e7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00e5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00cc, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x0081, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00ae, # LATIN CAPITAL LETTER AE + 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00e9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00e6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00e8, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00ed, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00ea, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00eb, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00ec, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0x00dc, # LATIN CAPITAL LETTER ETH + 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00f1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00ee, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00ef, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00cd, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x00af, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00f4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00f2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00f3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00a0, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x00de, # LATIN CAPITAL LETTER THORN + 0x00df: 0x00a7, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x008b, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x008c, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00be, # LATIN SMALL LETTER AE + 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x0093, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x00dd, # LATIN SMALL LETTER ETH + 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0098, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x009b, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00d6, # DIVISION SIGN + 0x00f8: 0x00bf, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00e0, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x00df, # LATIN SMALL LETTER THORN + 0x00ff: 0x00d8, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0131: 0x00f5, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0x00ce, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x00cf, # LATIN SMALL LIGATURE OE + 0x0178: 0x00d9, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0x00c4, # LATIN SMALL LETTER F WITH HOOK + 0x02c6: 0x00f6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02c7: 0x00ff, # CARON + 0x02d8: 0x00f9, # BREVE + 0x02d9: 0x00fa, # DOT ABOVE + 0x02da: 0x00fb, # RING ABOVE + 0x02db: 0x00fe, # OGONEK + 0x02dc: 0x00f7, # SMALL TILDE + 0x02dd: 0x00fd, # DOUBLE ACUTE ACCENT + 0x03a9: 0x00bd, # GREEK CAPITAL LETTER OMEGA + 0x03c0: 0x00b9, # GREEK SMALL LETTER PI + 0x2013: 0x00d0, # EN DASH + 0x2014: 0x00d1, # EM DASH + 0x2018: 0x00d4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x00d5, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x00e2, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x00d2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00d3, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00e3, # DOUBLE LOW-9 QUOTATION MARK + 0x2022: 0x00a5, # BULLET + 0x2026: 0x00c9, # HORIZONTAL ELLIPSIS + 0x2030: 0x00e4, # PER MILLE SIGN + 0x2044: 0x00da, # FRACTION SLASH + 0x20ac: 0x00db, # EURO SIGN + 0x2122: 0x00aa, # TRADE MARK SIGN + 0x2202: 0x00b6, # PARTIAL DIFFERENTIAL + 0x2206: 0x00c6, # INCREMENT + 0x220f: 0x00b8, # N-ARY PRODUCT + 0x2211: 0x00b7, # N-ARY SUMMATION + 0x221a: 0x00c3, # SQUARE ROOT + 0x221e: 0x00b0, # INFINITY + 0x222b: 0x00ba, # INTEGRAL + 0x2248: 0x00c5, # ALMOST EQUAL TO + 0x2260: 0x00ad, # NOT EQUAL TO + 0x2264: 0x00b2, # LESS-THAN OR EQUAL TO + 0x2265: 0x00b3, # GREATER-THAN OR EQUAL TO + 0x25ca: 0x00d7, # LOZENGE + 0xf8ff: 0x00f0, # Apple logo +} \ No newline at end of file Index: mac_roman.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_roman.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- mac_roman.py 8 Aug 2002 20:19:19 -0000 1.4 +++ mac_roman.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'ROMAN.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/APPLE/ROMAN.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,131 +32,649 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x2020, # DAGGER - 0x00a1: 0x00b0, # DEGREE SIGN - 0x00a4: 0x00a7, # SECTION SIGN - 0x00a5: 0x2022, # BULLET - 0x00a6: 0x00b6, # PILCROW SIGN - 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x2122, # TRADE MARK SIGN - 0x00ab: 0x00b4, # ACUTE ACCENT - 0x00ac: 0x00a8, # DIAERESIS - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00b0: 0x221e, # INFINITY - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x00a5, # YEN SIGN - 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL - 0x00b7: 0x2211, # N-ARY SUMMATION - 0x00b8: 0x220f, # N-ARY PRODUCT - 0x00b9: 0x03c0, # GREEK SMALL LETTER PI - 0x00ba: 0x222b, # INTEGRAL - 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00bd: 0x2126, # OHM SIGN - 0x00be: 0x00e6, # LATIN SMALL LIGATURE AE - 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00c0: 0x00bf, # INVERTED QUESTION MARK - 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x221a, # SQUARE ROOT - 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00c5: 0x2248, # ALMOST EQUAL TO - 0x00c6: 0x2206, # INCREMENT - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2014, # EM DASH - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x25ca, # LOZENGE - 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00da: 0x2044, # FRACTION SLASH - 0x00db: 0x00a4, # CURRENCY SIGN - 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x00de: 0xfb01, # LATIN SMALL LIGATURE FI - 0x00df: 0xfb02, # LATIN SMALL LIGATURE FL - 0x00e0: 0x2021, # DOUBLE DAGGER - 0x00e1: 0x00b7, # MIDDLE DOT - 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00e4: 0x2030, # PER MILLE SIGN - 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00f0: None, # UNDEFINED - 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x00f7: 0x02dc, # SMALL TILDE - 0x00f8: 0x00af, # MACRON - 0x00f9: 0x02d8, # BREVE - 0x00fa: 0x02d9, # DOT ABOVE - 0x00fb: 0x02da, # RING ABOVE - 0x00fc: 0x00b8, # CEDILLA - 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00fe: 0x02db, # OGONEK - 0x00ff: 0x02c7, # CARON + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x2020, # DAGGER + 0x00a1: 0x00b0, # DEGREE SIGN + 0x00a4: 0x00a7, # SECTION SIGN + 0x00a5: 0x2022, # BULLET + 0x00a6: 0x00b6, # PILCROW SIGN + 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x2122, # TRADE MARK SIGN + 0x00ab: 0x00b4, # ACUTE ACCENT + 0x00ac: 0x00a8, # DIAERESIS + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00b0: 0x221e, # INFINITY + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x00a5, # YEN SIGN + 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL + 0x00b7: 0x2211, # N-ARY SUMMATION + 0x00b8: 0x220f, # N-ARY PRODUCT + 0x00b9: 0x03c0, # GREEK SMALL LETTER PI + 0x00ba: 0x222b, # INTEGRAL + 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bd: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00be: 0x00e6, # LATIN SMALL LETTER AE + 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00c0: 0x00bf, # INVERTED QUESTION MARK + 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x221a, # SQUARE ROOT + 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00c5: 0x2248, # ALMOST EQUAL TO + 0x00c6: 0x2206, # INCREMENT + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2014, # EM DASH + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x25ca, # LOZENGE + 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00da: 0x2044, # FRACTION SLASH + 0x00db: 0x20ac, # EURO SIGN + 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x00de: 0xfb01, # LATIN SMALL LIGATURE FI + 0x00df: 0xfb02, # LATIN SMALL LIGATURE FL + 0x00e0: 0x2021, # DOUBLE DAGGER + 0x00e1: 0x00b7, # MIDDLE DOT + 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00e4: 0x2030, # PER MILLE SIGN + 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00f0: 0xf8ff, # Apple logo + 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x00f7: 0x02dc, # SMALL TILDE + 0x00f8: 0x00af, # MACRON + 0x00f9: 0x02d8, # BREVE + 0x00fa: 0x02d9, # DOT ABOVE + 0x00fb: 0x02da, # RING ABOVE + 0x00fc: 0x00b8, # CEDILLA + 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00fe: 0x02db, # OGONEK + 0x00ff: 0x02c7, # CARON }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> CONTROL CHARACTER + u'\x01' # 0x0001 -> CONTROL CHARACTER + u'\x02' # 0x0002 -> CONTROL CHARACTER + u'\x03' # 0x0003 -> CONTROL CHARACTER + u'\x04' # 0x0004 -> CONTROL CHARACTER + u'\x05' # 0x0005 -> CONTROL CHARACTER + u'\x06' # 0x0006 -> CONTROL CHARACTER + u'\x07' # 0x0007 -> CONTROL CHARACTER + u'\x08' # 0x0008 -> CONTROL CHARACTER + u'\t' # 0x0009 -> CONTROL CHARACTER + u'\n' # 0x000a -> CONTROL CHARACTER + u'\x0b' # 0x000b -> CONTROL CHARACTER + u'\x0c' # 0x000c -> CONTROL CHARACTER + u'\r' # 0x000d -> CONTROL CHARACTER + u'\x0e' # 0x000e -> CONTROL CHARACTER + u'\x0f' # 0x000f -> CONTROL CHARACTER + u'\x10' # 0x0010 -> CONTROL CHARACTER + u'\x11' # 0x0011 -> CONTROL CHARACTER + u'\x12' # 0x0012 -> CONTROL CHARACTER + u'\x13' # 0x0013 -> CONTROL CHARACTER + u'\x14' # 0x0014 -> CONTROL CHARACTER + u'\x15' # 0x0015 -> CONTROL CHARACTER + u'\x16' # 0x0016 -> CONTROL CHARACTER + u'\x17' # 0x0017 -> CONTROL CHARACTER + u'\x18' # 0x0018 -> CONTROL CHARACTER + u'\x19' # 0x0019 -> CONTROL CHARACTER + u'\x1a' # 0x001a -> CONTROL CHARACTER + u'\x1b' # 0x001b -> CONTROL CHARACTER + u'\x1c' # 0x001c -> CONTROL CHARACTER + u'\x1d' # 0x001d -> CONTROL CHARACTER + u'\x1e' # 0x001e -> CONTROL CHARACTER + u'\x1f' # 0x001f -> CONTROL CHARACTER + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> CONTROL CHARACTER + u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x0081 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x008b -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x008c -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x0093 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x0098 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x009b -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0x00a0 -> DAGGER + u'\xb0' # 0x00a1 -> DEGREE SIGN + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa7' # 0x00a4 -> SECTION SIGN + u'\u2022' # 0x00a5 -> BULLET + u'\xb6' # 0x00a6 -> PILCROW SIGN + u'\xdf' # 0x00a7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0x00a8 -> REGISTERED SIGN + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u2122' # 0x00aa -> TRADE MARK SIGN + u'\xb4' # 0x00ab -> ACUTE ACCENT + u'\xa8' # 0x00ac -> DIAERESIS + u'\u2260' # 0x00ad -> NOT EQUAL TO + u'\xc6' # 0x00ae -> LATIN CAPITAL LETTER AE + u'\xd8' # 0x00af -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0x00b0 -> INFINITY + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u2264' # 0x00b2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x00b3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0x00b4 -> YEN SIGN + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\u2202' # 0x00b6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0x00b7 -> N-ARY SUMMATION + u'\u220f' # 0x00b8 -> N-ARY PRODUCT + u'\u03c0' # 0x00b9 -> GREEK SMALL LETTER PI + u'\u222b' # 0x00ba -> INTEGRAL + u'\xaa' # 0x00bb -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00bc -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0x00bd -> GREEK CAPITAL LETTER OMEGA + u'\xe6' # 0x00be -> LATIN SMALL LETTER AE + u'\xf8' # 0x00bf -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0x00c0 -> INVERTED QUESTION MARK + u'\xa1' # 0x00c1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0x00c2 -> NOT SIGN + u'\u221a' # 0x00c3 -> SQUARE ROOT + u'\u0192' # 0x00c4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0x00c5 -> ALMOST EQUAL TO + u'\u2206' # 0x00c6 -> INCREMENT + u'\xab' # 0x00c7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00c8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0x00c9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0x00ca -> NO-BREAK SPACE + u'\xc0' # 0x00cb -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0x00cc -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0x00cd -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0x00ce -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0x00cf -> LATIN SMALL LIGATURE OE + u'\u2013' # 0x00d0 -> EN DASH + u'\u2014' # 0x00d1 -> EM DASH + u'\u201c' # 0x00d2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x00d3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0x00d4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x00d5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0x00d6 -> DIVISION SIGN + u'\u25ca' # 0x00d7 -> LOZENGE + u'\xff' # 0x00d8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0x00d9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u2044' # 0x00da -> FRACTION SLASH + u'\u20ac' # 0x00db -> EURO SIGN + u'\u2039' # 0x00dc -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u203a' # 0x00dd -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufb01' # 0x00de -> LATIN SMALL LIGATURE FI + u'\ufb02' # 0x00df -> LATIN SMALL LIGATURE FL + u'\u2021' # 0x00e0 -> DOUBLE DAGGER + u'\xb7' # 0x00e1 -> MIDDLE DOT + u'\u201a' # 0x00e2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0x00e3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0x00e4 -> PER MILLE SIGN + u'\xc2' # 0x00e5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0x00e6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0x00e7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0x00e8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x00e9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0x00ea -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00eb -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00ec -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0x00ed -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0x00ee -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00ef -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0x00f0 -> Apple logo + u'\xd2' # 0x00f1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0x00f2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00f3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0x00f4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0x00f5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0x00f6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0x00f7 -> SMALL TILDE + u'\xaf' # 0x00f8 -> MACRON + u'\u02d8' # 0x00f9 -> BREVE + u'\u02d9' # 0x00fa -> DOT ABOVE + u'\u02da' # 0x00fb -> RING ABOVE + u'\xb8' # 0x00fc -> CEDILLA + u'\u02dd' # 0x00fd -> DOUBLE ACUTE ACCENT + u'\u02db' # 0x00fe -> OGONEK + u'\u02c7' # 0x00ff -> CARON +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # CONTROL CHARACTER + 0x0001: 0x0001, # CONTROL CHARACTER + 0x0002: 0x0002, # CONTROL CHARACTER + 0x0003: 0x0003, # CONTROL CHARACTER + 0x0004: 0x0004, # CONTROL CHARACTER + 0x0005: 0x0005, # CONTROL CHARACTER + 0x0006: 0x0006, # CONTROL CHARACTER + 0x0007: 0x0007, # CONTROL CHARACTER + 0x0008: 0x0008, # CONTROL CHARACTER + 0x0009: 0x0009, # CONTROL CHARACTER + 0x000a: 0x000a, # CONTROL CHARACTER + 0x000b: 0x000b, # CONTROL CHARACTER + 0x000c: 0x000c, # CONTROL CHARACTER + 0x000d: 0x000d, # CONTROL CHARACTER + 0x000e: 0x000e, # CONTROL CHARACTER + 0x000f: 0x000f, # CONTROL CHARACTER + 0x0010: 0x0010, # CONTROL CHARACTER + 0x0011: 0x0011, # CONTROL CHARACTER + 0x0012: 0x0012, # CONTROL CHARACTER + 0x0013: 0x0013, # CONTROL CHARACTER + 0x0014: 0x0014, # CONTROL CHARACTER + 0x0015: 0x0015, # CONTROL CHARACTER + 0x0016: 0x0016, # CONTROL CHARACTER + 0x0017: 0x0017, # CONTROL CHARACTER + 0x0018: 0x0018, # CONTROL CHARACTER + 0x0019: 0x0019, # CONTROL CHARACTER + 0x001a: 0x001a, # CONTROL CHARACTER + 0x001b: 0x001b, # CONTROL CHARACTER + 0x001c: 0x001c, # CONTROL CHARACTER + 0x001d: 0x001d, # CONTROL CHARACTER + 0x001e: 0x001e, # CONTROL CHARACTER + 0x001f: 0x001f, # CONTROL CHARACTER + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # CONTROL CHARACTER + 0x00a0: 0x00ca, # NO-BREAK SPACE + 0x00a1: 0x00c1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a5: 0x00b4, # YEN SIGN + 0x00a7: 0x00a4, # SECTION SIGN + 0x00a8: 0x00ac, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00bb, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00c7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00c2, # NOT SIGN + 0x00ae: 0x00a8, # REGISTERED SIGN + 0x00af: 0x00f8, # MACRON + 0x00b0: 0x00a1, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b4: 0x00ab, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00a6, # PILCROW SIGN + 0x00b7: 0x00e1, # MIDDLE DOT + 0x00b8: 0x00fc, # CEDILLA + 0x00ba: 0x00bc, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00c8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bf: 0x00c0, # INVERTED QUESTION MARK + 0x00c0: 0x00cb, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00e7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00e5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00cc, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x0081, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00ae, # LATIN CAPITAL LETTER AE + 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00e9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00e6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00e8, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00ed, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00ea, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00eb, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00ec, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00f1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00ee, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00ef, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00cd, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x00af, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00f4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00f2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00f3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00a7, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x008b, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x008c, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00be, # LATIN SMALL LETTER AE + 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x0093, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0098, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x009b, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00d6, # DIVISION SIGN + 0x00f8: 0x00bf, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00d8, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0131: 0x00f5, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0x00ce, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x00cf, # LATIN SMALL LIGATURE OE + 0x0178: 0x00d9, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0x00c4, # LATIN SMALL LETTER F WITH HOOK + 0x02c6: 0x00f6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02c7: 0x00ff, # CARON + 0x02d8: 0x00f9, # BREVE + 0x02d9: 0x00fa, # DOT ABOVE + 0x02da: 0x00fb, # RING ABOVE + 0x02db: 0x00fe, # OGONEK + 0x02dc: 0x00f7, # SMALL TILDE + 0x02dd: 0x00fd, # DOUBLE ACUTE ACCENT + 0x03a9: 0x00bd, # GREEK CAPITAL LETTER OMEGA + 0x03c0: 0x00b9, # GREEK SMALL LETTER PI + 0x2013: 0x00d0, # EN DASH + 0x2014: 0x00d1, # EM DASH + 0x2018: 0x00d4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x00d5, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x00e2, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x00d2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00d3, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00e3, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x00a0, # DAGGER + 0x2021: 0x00e0, # DOUBLE DAGGER + 0x2022: 0x00a5, # BULLET + 0x2026: 0x00c9, # HORIZONTAL ELLIPSIS + 0x2030: 0x00e4, # PER MILLE SIGN + 0x2039: 0x00dc, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x00dd, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x2044: 0x00da, # FRACTION SLASH + 0x20ac: 0x00db, # EURO SIGN + 0x2122: 0x00aa, # TRADE MARK SIGN + 0x2202: 0x00b6, # PARTIAL DIFFERENTIAL + 0x2206: 0x00c6, # INCREMENT + 0x220f: 0x00b8, # N-ARY PRODUCT + 0x2211: 0x00b7, # N-ARY SUMMATION + 0x221a: 0x00c3, # SQUARE ROOT + 0x221e: 0x00b0, # INFINITY + 0x222b: 0x00ba, # INTEGRAL + 0x2248: 0x00c5, # ALMOST EQUAL TO + 0x2260: 0x00ad, # NOT EQUAL TO + 0x2264: 0x00b2, # LESS-THAN OR EQUAL TO + 0x2265: 0x00b3, # GREATER-THAN OR EQUAL TO + 0x25ca: 0x00d7, # LOZENGE + 0xf8ff: 0x00f0, # Apple logo + 0xfb01: 0x00de, # LATIN SMALL LIGATURE FI + 0xfb02: 0x00df, # LATIN SMALL LIGATURE FL +} \ No newline at end of file Index: mac_turkish.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_turkish.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- mac_turkish.py 8 Aug 2002 20:19:19 -0000 1.4 +++ mac_turkish.py 21 Oct 2005 13:49:12 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'TURKISH.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/APPLE/TURKISH.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -37,131 +32,649 @@ decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x2020, # DAGGER - 0x00a1: 0x00b0, # DEGREE SIGN - 0x00a4: 0x00a7, # SECTION SIGN - 0x00a5: 0x2022, # BULLET - 0x00a6: 0x00b6, # PILCROW SIGN - 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x2122, # TRADE MARK SIGN - 0x00ab: 0x00b4, # ACUTE ACCENT - 0x00ac: 0x00a8, # DIAERESIS - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00b0: 0x221e, # INFINITY - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x00a5, # YEN SIGN - 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL - 0x00b7: 0x2211, # N-ARY SUMMATION - 0x00b8: 0x220f, # N-ARY PRODUCT - 0x00b9: 0x03c0, # GREEK SMALL LETTER PI - 0x00ba: 0x222b, # INTEGRAL - 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00bd: 0x2126, # OHM SIGN - 0x00be: 0x00e6, # LATIN SMALL LIGATURE AE - 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00c0: 0x00bf, # INVERTED QUESTION MARK - 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x221a, # SQUARE ROOT - 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00c5: 0x2248, # ALMOST EQUAL TO - 0x00c6: 0x2206, # INCREMENT - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2014, # EM DASH - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x25ca, # LOZENGE - 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00da: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00db: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00dc: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x00dd: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00df: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00e0: 0x2021, # DOUBLE DAGGER - 0x00e1: 0x00b7, # MIDDLE DOT - 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00e4: 0x2030, # PER MILLE SIGN - 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00f0: None, # UNDEFINED - 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00f5: None, # UNDEFINED - 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x00f7: 0x02dc, # SMALL TILDE - 0x00f8: 0x00af, # MACRON - 0x00f9: 0x02d8, # BREVE - 0x00fa: 0x02d9, # DOT ABOVE - 0x00fb: 0x02da, # RING ABOVE - 0x00fc: 0x00b8, # CEDILLA - 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00fe: 0x02db, # OGONEK - 0x00ff: 0x02c7, # CARON + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x2020, # DAGGER + 0x00a1: 0x00b0, # DEGREE SIGN + 0x00a4: 0x00a7, # SECTION SIGN + 0x00a5: 0x2022, # BULLET + 0x00a6: 0x00b6, # PILCROW SIGN + 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x2122, # TRADE MARK SIGN + 0x00ab: 0x00b4, # ACUTE ACCENT + 0x00ac: 0x00a8, # DIAERESIS + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00b0: 0x221e, # INFINITY + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x00a5, # YEN SIGN + 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL + 0x00b7: 0x2211, # N-ARY SUMMATION + 0x00b8: 0x220f, # N-ARY PRODUCT + 0x00b9: 0x03c0, # GREEK SMALL LETTER PI + 0x00ba: 0x222b, # INTEGRAL + 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00bd: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00be: 0x00e6, # LATIN SMALL LETTER AE + 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00c0: 0x00bf, # INVERTED QUESTION MARK + 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x221a, # SQUARE ROOT + 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00c5: 0x2248, # ALMOST EQUAL TO + 0x00c6: 0x2206, # INCREMENT + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2014, # EM DASH + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x25ca, # LOZENGE + 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00da: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00db: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00dc: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x00dd: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00df: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00e0: 0x2021, # DOUBLE DAGGER + 0x00e1: 0x00b7, # MIDDLE DOT + 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00e4: 0x2030, # PER MILLE SIGN + 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00f0: 0xf8ff, # Apple logo + 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00f5: 0xf8a0, # undefined1 + 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x00f7: 0x02dc, # SMALL TILDE + 0x00f8: 0x00af, # MACRON + 0x00f9: 0x02d8, # BREVE + 0x00fa: 0x02d9, # DOT ABOVE + 0x00fb: 0x02da, # RING ABOVE + 0x00fc: 0x00b8, # CEDILLA + 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00fe: 0x02db, # OGONEK + 0x00ff: 0x02c7, # CARON }) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> CONTROL CHARACTER + u'\x01' # 0x0001 -> CONTROL CHARACTER + u'\x02' # 0x0002 -> CONTROL CHARACTER + u'\x03' # 0x0003 -> CONTROL CHARACTER + u'\x04' # 0x0004 -> CONTROL CHARACTER + u'\x05' # 0x0005 -> CONTROL CHARACTER + u'\x06' # 0x0006 -> CONTROL CHARACTER + u'\x07' # 0x0007 -> CONTROL CHARACTER + u'\x08' # 0x0008 -> CONTROL CHARACTER + u'\t' # 0x0009 -> CONTROL CHARACTER + u'\n' # 0x000a -> CONTROL CHARACTER + u'\x0b' # 0x000b -> CONTROL CHARACTER + u'\x0c' # 0x000c -> CONTROL CHARACTER + u'\r' # 0x000d -> CONTROL CHARACTER + u'\x0e' # 0x000e -> CONTROL CHARACTER + u'\x0f' # 0x000f -> CONTROL CHARACTER + u'\x10' # 0x0010 -> CONTROL CHARACTER + u'\x11' # 0x0011 -> CONTROL CHARACTER + u'\x12' # 0x0012 -> CONTROL CHARACTER + u'\x13' # 0x0013 -> CONTROL CHARACTER + u'\x14' # 0x0014 -> CONTROL CHARACTER + u'\x15' # 0x0015 -> CONTROL CHARACTER + u'\x16' # 0x0016 -> CONTROL CHARACTER + u'\x17' # 0x0017 -> CONTROL CHARACTER + u'\x18' # 0x0018 -> CONTROL CHARACTER + u'\x19' # 0x0019 -> CONTROL CHARACTER + u'\x1a' # 0x001a -> CONTROL CHARACTER + u'\x1b' # 0x001b -> CONTROL CHARACTER + u'\x1c' # 0x001c -> CONTROL CHARACTER + u'\x1d' # 0x001d -> CONTROL CHARACTER + u'\x1e' # 0x001e -> CONTROL CHARACTER + u'\x1f' # 0x001f -> CONTROL CHARACTER + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> CONTROL CHARACTER + u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x0081 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x008b -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x008c -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x0093 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x0098 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x009b -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0x00a0 -> DAGGER + u'\xb0' # 0x00a1 -> DEGREE SIGN + u'\xa2' # 0x00a2 -> CENT SIGN + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa7' # 0x00a4 -> SECTION SIGN + u'\u2022' # 0x00a5 -> BULLET + u'\xb6' # 0x00a6 -> PILCROW SIGN + u'\xdf' # 0x00a7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0x00a8 -> REGISTERED SIGN + u'\xa9' # 0x00a9 -> COPYRIGHT SIGN + u'\u2122' # 0x00aa -> TRADE MARK SIGN + u'\xb4' # 0x00ab -> ACUTE ACCENT + u'\xa8' # 0x00ac -> DIAERESIS + u'\u2260' # 0x00ad -> NOT EQUAL TO + u'\xc6' # 0x00ae -> LATIN CAPITAL LETTER AE + u'\xd8' # 0x00af -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0x00b0 -> INFINITY + u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN + u'\u2264' # 0x00b2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x00b3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0x00b4 -> YEN SIGN + u'\xb5' # 0x00b5 -> MICRO SIGN + u'\u2202' # 0x00b6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0x00b7 -> N-ARY SUMMATION + u'\u220f' # 0x00b8 -> N-ARY PRODUCT + u'\u03c0' # 0x00b9 -> GREEK SMALL LETTER PI + u'\u222b' # 0x00ba -> INTEGRAL + u'\xaa' # 0x00bb -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00bc -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0x00bd -> GREEK CAPITAL LETTER OMEGA + u'\xe6' # 0x00be -> LATIN SMALL LETTER AE + u'\xf8' # 0x00bf -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0x00c0 -> INVERTED QUESTION MARK + u'\xa1' # 0x00c1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0x00c2 -> NOT SIGN + u'\u221a' # 0x00c3 -> SQUARE ROOT + u'\u0192' # 0x00c4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0x00c5 -> ALMOST EQUAL TO + u'\u2206' # 0x00c6 -> INCREMENT + u'\xab' # 0x00c7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00c8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0x00c9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0x00ca -> NO-BREAK SPACE + u'\xc0' # 0x00cb -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0x00cc -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0x00cd -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0x00ce -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0x00cf -> LATIN SMALL LIGATURE OE + u'\u2013' # 0x00d0 -> EN DASH + u'\u2014' # 0x00d1 -> EM DASH + u'\u201c' # 0x00d2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x00d3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0x00d4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x00d5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0x00d6 -> DIVISION SIGN + u'\u25ca' # 0x00d7 -> LOZENGE + u'\xff' # 0x00d8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0x00d9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u011e' # 0x00da -> LATIN CAPITAL LETTER G WITH BREVE + u'\u011f' # 0x00db -> LATIN SMALL LETTER G WITH BREVE + u'\u0130' # 0x00dc -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u0131' # 0x00dd -> LATIN SMALL LETTER DOTLESS I + u'\u015e' # 0x00de -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u015f' # 0x00df -> LATIN SMALL LETTER S WITH CEDILLA + u'\u2021' # 0x00e0 -> DOUBLE DAGGER + u'\xb7' # 0x00e1 -> MIDDLE DOT + u'\u201a' # 0x00e2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0x00e3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0x00e4 -> PER MILLE SIGN + u'\xc2' # 0x00e5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0x00e6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0x00e7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0x00e8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x00e9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0x00ea -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00eb -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00ec -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0x00ed -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0x00ee -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0x00ef -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0x00f0 -> Apple logo + u'\xd2' # 0x00f1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0x00f2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00f3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0x00f4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\uf8a0' # 0x00f5 -> undefined1 + u'\u02c6' # 0x00f6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0x00f7 -> SMALL TILDE + u'\xaf' # 0x00f8 -> MACRON + u'\u02d8' # 0x00f9 -> BREVE + u'\u02d9' # 0x00fa -> DOT ABOVE + u'\u02da' # 0x00fb -> RING ABOVE + u'\xb8' # 0x00fc -> CEDILLA + u'\u02dd' # 0x00fd -> DOUBLE ACUTE ACCENT + u'\u02db' # 0x00fe -> OGONEK + u'\u02c7' # 0x00ff -> CARON +) + ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x0000, # CONTROL CHARACTER + 0x0001: 0x0001, # CONTROL CHARACTER + 0x0002: 0x0002, # CONTROL CHARACTER + 0x0003: 0x0003, # CONTROL CHARACTER + 0x0004: 0x0004, # CONTROL CHARACTER + 0x0005: 0x0005, # CONTROL CHARACTER + 0x0006: 0x0006, # CONTROL CHARACTER + 0x0007: 0x0007, # CONTROL CHARACTER + 0x0008: 0x0008, # CONTROL CHARACTER + 0x0009: 0x0009, # CONTROL CHARACTER + 0x000a: 0x000a, # CONTROL CHARACTER + 0x000b: 0x000b, # CONTROL CHARACTER + 0x000c: 0x000c, # CONTROL CHARACTER + 0x000d: 0x000d, # CONTROL CHARACTER + 0x000e: 0x000e, # CONTROL CHARACTER + 0x000f: 0x000f, # CONTROL CHARACTER + 0x0010: 0x0010, # CONTROL CHARACTER + 0x0011: 0x0011, # CONTROL CHARACTER + 0x0012: 0x0012, # CONTROL CHARACTER + 0x0013: 0x0013, # CONTROL CHARACTER + 0x0014: 0x0014, # CONTROL CHARACTER + 0x0015: 0x0015, # CONTROL CHARACTER + 0x0016: 0x0016, # CONTROL CHARACTER + 0x0017: 0x0017, # CONTROL CHARACTER + 0x0018: 0x0018, # CONTROL CHARACTER + 0x0019: 0x0019, # CONTROL CHARACTER + 0x001a: 0x001a, # CONTROL CHARACTER + 0x001b: 0x001b, # CONTROL CHARACTER + 0x001c: 0x001c, # CONTROL CHARACTER + 0x001d: 0x001d, # CONTROL CHARACTER + 0x001e: 0x001e, # CONTROL CHARACTER + 0x001f: 0x001f, # CONTROL CHARACTER + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # CONTROL CHARACTER + 0x00a0: 0x00ca, # NO-BREAK SPACE + 0x00a1: 0x00c1, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00a2, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a5: 0x00b4, # YEN SIGN + 0x00a7: 0x00a4, # SECTION SIGN + 0x00a8: 0x00ac, # DIAERESIS + 0x00a9: 0x00a9, # COPYRIGHT SIGN + 0x00aa: 0x00bb, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00c7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00c2, # NOT SIGN + 0x00ae: 0x00a8, # REGISTERED SIGN + 0x00af: 0x00f8, # MACRON + 0x00b0: 0x00a1, # DEGREE SIGN + 0x00b1: 0x00b1, # PLUS-MINUS SIGN + 0x00b4: 0x00ab, # ACUTE ACCENT + 0x00b5: 0x00b5, # MICRO SIGN + 0x00b6: 0x00a6, # PILCROW SIGN + 0x00b7: 0x00e1, # MIDDLE DOT + 0x00b8: 0x00fc, # CEDILLA + 0x00ba: 0x00bc, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00c8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bf: 0x00c0, # INVERTED QUESTION MARK + 0x00c0: 0x00cb, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00e7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00e5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00cc, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x0081, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x00ae, # LATIN CAPITAL LETTER AE + 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00e9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00e6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00e8, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00ed, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00ea, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00eb, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00ec, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00f1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00ee, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00ef, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00cd, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x00af, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00f4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00f2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00f3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00a7, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x008b, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x008c, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x00be, # LATIN SMALL LETTER AE + 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x0093, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0098, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x009b, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00d6, # DIVISION SIGN + 0x00f8: 0x00bf, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00d8, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011e: 0x00da, # LATIN CAPITAL LETTER G WITH BREVE + 0x011f: 0x00db, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0x00dc, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0x00dd, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0x00ce, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x00cf, # LATIN SMALL LIGATURE OE + 0x015e: 0x00de, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00df, # LATIN SMALL LETTER S WITH CEDILLA + 0x0178: 0x00d9, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0x00c4, # LATIN SMALL LETTER F WITH HOOK + 0x02c6: 0x00f6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02c7: 0x00ff, # CARON + 0x02d8: 0x00f9, # BREVE + 0x02d9: 0x00fa, # DOT ABOVE + 0x02da: 0x00fb, # RING ABOVE + 0x02db: 0x00fe, # OGONEK + 0x02dc: 0x00f7, # SMALL TILDE + 0x02dd: 0x00fd, # DOUBLE ACUTE ACCENT + 0x03a9: 0x00bd, # GREEK CAPITAL LETTER OMEGA + 0x03c0: 0x00b9, # GREEK SMALL LETTER PI + 0x2013: 0x00d0, # EN DASH + 0x2014: 0x00d1, # EM DASH + 0x2018: 0x00d4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x00d5, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x00e2, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x00d2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00d3, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00e3, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x00a0, # DAGGER + 0x2021: 0x00e0, # DOUBLE DAGGER + 0x2022: 0x00a5, # BULLET + 0x2026: 0x00c9, # HORIZONTAL ELLIPSIS + 0x2030: 0x00e4, # PER MILLE SIGN + 0x2122: 0x00aa, # TRADE MARK SIGN + 0x2202: 0x00b6, # PARTIAL DIFFERENTIAL + 0x2206: 0x00c6, # INCREMENT + 0x220f: 0x00b8, # N-ARY PRODUCT + 0x2211: 0x00b7, # N-ARY SUMMATION + 0x221a: 0x00c3, # SQUARE ROOT + 0x221e: 0x00b0, # INFINITY + 0x222b: 0x00ba, # INTEGRAL + 0x2248: 0x00c5, # ALMOST EQUAL TO + 0x2260: 0x00ad, # NOT EQUAL TO + 0x2264: 0x00b2, # LESS-THAN OR EQUAL TO + 0x2265: 0x00b3, # GREATER-THAN OR EQUAL TO + 0x25ca: 0x00d7, # LOZENGE + 0xf8a0: 0x00f5, # undefined1 + 0xf8ff: 0x00f0, # Apple logo +} \ No newline at end of file From lemburg at users.sourceforge.net Fri Oct 21 15:58:38 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Fri, 21 Oct 2005 15:58:38 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/encodings mac_arabic.py, NONE, 1.1 mac_centeuro.py, NONE, 1.1 mac_croatian.py, NONE, 1.1 mac_farsi.py, NONE, 1.1 mac_romanian.py, NONE, 1.1 Message-ID: <20051021135838.22EE21E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/encodings In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17849 Added Files: mac_arabic.py mac_centeuro.py mac_croatian.py mac_farsi.py mac_romanian.py Log Message: Add a few more Mac OS encodings. The mapping tables for these are available at ftp.unicode.org. --- NEW FILE: mac_arabic.py --- """ Python Character Mapping Codec generated from 'VENDORS/APPLE/ARABIC.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x00a0, # NO-BREAK SPACE, right-left 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x008b: 0x06ba, # ARABIC LETTER NOON GHUNNA 0x008c: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE 0x0093: 0x2026, # HORIZONTAL ELLIPSIS, right-left 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE 0x0098: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS 0x009b: 0x00f7, # DIVISION SIGN, right-left 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x00a0: 0x0020, # SPACE, right-left 0x00a1: 0x0021, # EXCLAMATION MARK, right-left 0x00a2: 0x0022, # QUOTATION MARK, right-left 0x00a3: 0x0023, # NUMBER SIGN, right-left 0x00a4: 0x0024, # DOLLAR SIGN, right-left 0x00a5: 0x066a, # ARABIC PERCENT SIGN 0x00a6: 0x0026, # AMPERSAND, right-left 0x00a7: 0x0027, # APOSTROPHE, right-left 0x00a8: 0x0028, # LEFT PARENTHESIS, right-left 0x00a9: 0x0029, # RIGHT PARENTHESIS, right-left 0x00aa: 0x002a, # ASTERISK, right-left 0x00ab: 0x002b, # PLUS SIGN, right-left 0x00ac: 0x060c, # ARABIC COMMA 0x00ad: 0x002d, # HYPHEN-MINUS, right-left 0x00ae: 0x002e, # FULL STOP, right-left 0x00af: 0x002f, # SOLIDUS, right-left 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO, right-left (need override) 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE, right-left (need override) 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO, right-left (need override) 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE, right-left (need override) 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR, right-left (need override) 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE, right-left (need override) 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX, right-left (need override) 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN, right-left (need override) 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT, right-left (need override) 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE, right-left (need override) 0x00ba: 0x003a, # COLON, right-left 0x00bb: 0x061b, # ARABIC SEMICOLON 0x00bc: 0x003c, # LESS-THAN SIGN, right-left 0x00bd: 0x003d, # EQUALS SIGN, right-left 0x00be: 0x003e, # GREATER-THAN SIGN, right-left 0x00bf: 0x061f, # ARABIC QUESTION MARK 0x00c0: 0x274a, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left 0x00c1: 0x0621, # ARABIC LETTER HAMZA 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE 0x00c7: 0x0627, # ARABIC LETTER ALEF 0x00c8: 0x0628, # ARABIC LETTER BEH 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA 0x00ca: 0x062a, # ARABIC LETTER TEH 0x00cb: 0x062b, # ARABIC LETTER THEH 0x00cc: 0x062c, # ARABIC LETTER JEEM 0x00cd: 0x062d, # ARABIC LETTER HAH 0x00ce: 0x062e, # ARABIC LETTER KHAH 0x00cf: 0x062f, # ARABIC LETTER DAL 0x00d0: 0x0630, # ARABIC LETTER THAL 0x00d1: 0x0631, # ARABIC LETTER REH 0x00d2: 0x0632, # ARABIC LETTER ZAIN 0x00d3: 0x0633, # ARABIC LETTER SEEN 0x00d4: 0x0634, # ARABIC LETTER SHEEN 0x00d5: 0x0635, # ARABIC LETTER SAD 0x00d6: 0x0636, # ARABIC LETTER DAD 0x00d7: 0x0637, # ARABIC LETTER TAH 0x00d8: 0x0638, # ARABIC LETTER ZAH 0x00d9: 0x0639, # ARABIC LETTER AIN 0x00da: 0x063a, # ARABIC LETTER GHAIN 0x00db: 0x005b, # LEFT SQUARE BRACKET, right-left 0x00dc: 0x005c, # REVERSE SOLIDUS, right-left 0x00dd: 0x005d, # RIGHT SQUARE BRACKET, right-left 0x00de: 0x005e, # CIRCUMFLEX ACCENT, right-left 0x00df: 0x005f, # LOW LINE, right-left 0x00e0: 0x0640, # ARABIC TATWEEL 0x00e1: 0x0641, # ARABIC LETTER FEH 0x00e2: 0x0642, # ARABIC LETTER QAF 0x00e3: 0x0643, # ARABIC LETTER KAF 0x00e4: 0x0644, # ARABIC LETTER LAM 0x00e5: 0x0645, # ARABIC LETTER MEEM 0x00e6: 0x0646, # ARABIC LETTER NOON 0x00e7: 0x0647, # ARABIC LETTER HEH 0x00e8: 0x0648, # ARABIC LETTER WAW 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA 0x00ea: 0x064a, # ARABIC LETTER YEH 0x00eb: 0x064b, # ARABIC FATHATAN 0x00ec: 0x064c, # ARABIC DAMMATAN 0x00ed: 0x064d, # ARABIC KASRATAN 0x00ee: 0x064e, # ARABIC FATHA 0x00ef: 0x064f, # ARABIC DAMMA 0x00f0: 0x0650, # ARABIC KASRA 0x00f1: 0x0651, # ARABIC SHADDA 0x00f2: 0x0652, # ARABIC SUKUN 0x00f3: 0x067e, # ARABIC LETTER PEH 0x00f4: 0x0679, # ARABIC LETTER TTEH 0x00f5: 0x0686, # ARABIC LETTER TCHEH 0x00f6: 0x06d5, # ARABIC LETTER AE 0x00f7: 0x06a4, # ARABIC LETTER VEH 0x00f8: 0x06af, # ARABIC LETTER GAF 0x00f9: 0x0688, # ARABIC LETTER DDAL 0x00fa: 0x0691, # ARABIC LETTER RREH 0x00fb: 0x007b, # LEFT CURLY BRACKET, right-left 0x00fc: 0x007c, # VERTICAL LINE, right-left 0x00fd: 0x007d, # RIGHT CURLY BRACKET, right-left 0x00fe: 0x0698, # ARABIC LETTER JEH 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> CONTROL CHARACTER u'\x01' # 0x0001 -> CONTROL CHARACTER u'\x02' # 0x0002 -> CONTROL CHARACTER u'\x03' # 0x0003 -> CONTROL CHARACTER u'\x04' # 0x0004 -> CONTROL CHARACTER u'\x05' # 0x0005 -> CONTROL CHARACTER u'\x06' # 0x0006 -> CONTROL CHARACTER u'\x07' # 0x0007 -> CONTROL CHARACTER u'\x08' # 0x0008 -> CONTROL CHARACTER u'\t' # 0x0009 -> CONTROL CHARACTER u'\n' # 0x000a -> CONTROL CHARACTER u'\x0b' # 0x000b -> CONTROL CHARACTER u'\x0c' # 0x000c -> CONTROL CHARACTER u'\r' # 0x000d -> CONTROL CHARACTER u'\x0e' # 0x000e -> CONTROL CHARACTER u'\x0f' # 0x000f -> CONTROL CHARACTER u'\x10' # 0x0010 -> CONTROL CHARACTER u'\x11' # 0x0011 -> CONTROL CHARACTER u'\x12' # 0x0012 -> CONTROL CHARACTER u'\x13' # 0x0013 -> CONTROL CHARACTER u'\x14' # 0x0014 -> CONTROL CHARACTER u'\x15' # 0x0015 -> CONTROL CHARACTER u'\x16' # 0x0016 -> CONTROL CHARACTER u'\x17' # 0x0017 -> CONTROL CHARACTER u'\x18' # 0x0018 -> CONTROL CHARACTER u'\x19' # 0x0019 -> CONTROL CHARACTER u'\x1a' # 0x001a -> CONTROL CHARACTER u'\x1b' # 0x001b -> CONTROL CHARACTER u'\x1c' # 0x001c -> CONTROL CHARACTER u'\x1d' # 0x001d -> CONTROL CHARACTER u'\x1e' # 0x001e -> CONTROL CHARACTER u'\x1f' # 0x001f -> CONTROL CHARACTER u' ' # 0x0020 -> SPACE, left-right u'!' # 0x0021 -> EXCLAMATION MARK, left-right u'"' # 0x0022 -> QUOTATION MARK, left-right u'#' # 0x0023 -> NUMBER SIGN, left-right u'$' # 0x0024 -> DOLLAR SIGN, left-right u'%' # 0x0025 -> PERCENT SIGN, left-right u'&' # 0x0026 -> AMPERSAND, left-right u"'" # 0x0027 -> APOSTROPHE, left-right u'(' # 0x0028 -> LEFT PARENTHESIS, left-right u')' # 0x0029 -> RIGHT PARENTHESIS, left-right u'*' # 0x002a -> ASTERISK, left-right u'+' # 0x002b -> PLUS SIGN, left-right u',' # 0x002c -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR u'-' # 0x002d -> HYPHEN-MINUS, left-right u'.' # 0x002e -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR u'/' # 0x002f -> SOLIDUS, left-right u'0' # 0x0030 -> DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE u'2' # 0x0032 -> DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO u'3' # 0x0033 -> DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE u':' # 0x003a -> COLON, left-right u';' # 0x003b -> SEMICOLON, left-right u'<' # 0x003c -> LESS-THAN SIGN, left-right u'=' # 0x003d -> EQUALS SIGN, left-right u'>' # 0x003e -> GREATER-THAN SIGN, left-right u'?' # 0x003f -> QUESTION MARK, left-right u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET, left-right u'\\' # 0x005c -> REVERSE SOLIDUS, left-right u']' # 0x005d -> RIGHT SQUARE BRACKET, left-right u'^' # 0x005e -> CIRCUMFLEX ACCENT, left-right u'_' # 0x005f -> LOW LINE, left-right u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET, left-right u'|' # 0x007c -> VERTICAL LINE, left-right u'}' # 0x007d -> RIGHT CURLY BRACKET, left-right u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> CONTROL CHARACTER u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xa0' # 0x0081 -> NO-BREAK SPACE, right-left u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS u'\u06ba' # 0x008b -> ARABIC LETTER NOON GHUNNA u'\xab' # 0x008c -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE u'\u2026' # 0x0093 -> HORIZONTAL ELLIPSIS, right-left u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE u'\xbb' # 0x0098 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf7' # 0x009b -> DIVISION SIGN, right-left u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS u' ' # 0x00a0 -> SPACE, right-left u'!' # 0x00a1 -> EXCLAMATION MARK, right-left u'"' # 0x00a2 -> QUOTATION MARK, right-left u'#' # 0x00a3 -> NUMBER SIGN, right-left u'$' # 0x00a4 -> DOLLAR SIGN, right-left u'\u066a' # 0x00a5 -> ARABIC PERCENT SIGN u'&' # 0x00a6 -> AMPERSAND, right-left u"'" # 0x00a7 -> APOSTROPHE, right-left u'(' # 0x00a8 -> LEFT PARENTHESIS, right-left u')' # 0x00a9 -> RIGHT PARENTHESIS, right-left u'*' # 0x00aa -> ASTERISK, right-left u'+' # 0x00ab -> PLUS SIGN, right-left u'\u060c' # 0x00ac -> ARABIC COMMA u'-' # 0x00ad -> HYPHEN-MINUS, right-left u'.' # 0x00ae -> FULL STOP, right-left u'/' # 0x00af -> SOLIDUS, right-left u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO, right-left (need override) u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE, right-left (need override) u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO, right-left (need override) u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE, right-left (need override) u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR, right-left (need override) u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE, right-left (need override) u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX, right-left (need override) u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN, right-left (need override) u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT, right-left (need override) u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE, right-left (need override) u':' # 0x00ba -> COLON, right-left u'\u061b' # 0x00bb -> ARABIC SEMICOLON u'<' # 0x00bc -> LESS-THAN SIGN, right-left u'=' # 0x00bd -> EQUALS SIGN, right-left u'>' # 0x00be -> GREATER-THAN SIGN, right-left u'\u061f' # 0x00bf -> ARABIC QUESTION MARK u'\u274a' # 0x00c0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left u'\u0621' # 0x00c1 -> ARABIC LETTER HAMZA u'\u0622' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE u'\u0623' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE u'\u0624' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE u'\u0625' # 0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW u'\u0626' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE u'\u0627' # 0x00c7 -> ARABIC LETTER ALEF u'\u0628' # 0x00c8 -> ARABIC LETTER BEH u'\u0629' # 0x00c9 -> ARABIC LETTER TEH MARBUTA u'\u062a' # 0x00ca -> ARABIC LETTER TEH u'\u062b' # 0x00cb -> ARABIC LETTER THEH u'\u062c' # 0x00cc -> ARABIC LETTER JEEM u'\u062d' # 0x00cd -> ARABIC LETTER HAH u'\u062e' # 0x00ce -> ARABIC LETTER KHAH u'\u062f' # 0x00cf -> ARABIC LETTER DAL u'\u0630' # 0x00d0 -> ARABIC LETTER THAL u'\u0631' # 0x00d1 -> ARABIC LETTER REH u'\u0632' # 0x00d2 -> ARABIC LETTER ZAIN u'\u0633' # 0x00d3 -> ARABIC LETTER SEEN u'\u0634' # 0x00d4 -> ARABIC LETTER SHEEN u'\u0635' # 0x00d5 -> ARABIC LETTER SAD u'\u0636' # 0x00d6 -> ARABIC LETTER DAD u'\u0637' # 0x00d7 -> ARABIC LETTER TAH u'\u0638' # 0x00d8 -> ARABIC LETTER ZAH u'\u0639' # 0x00d9 -> ARABIC LETTER AIN u'\u063a' # 0x00da -> ARABIC LETTER GHAIN u'[' # 0x00db -> LEFT SQUARE BRACKET, right-left u'\\' # 0x00dc -> REVERSE SOLIDUS, right-left u']' # 0x00dd -> RIGHT SQUARE BRACKET, right-left u'^' # 0x00de -> CIRCUMFLEX ACCENT, right-left u'_' # 0x00df -> LOW LINE, right-left u'\u0640' # 0x00e0 -> ARABIC TATWEEL u'\u0641' # 0x00e1 -> ARABIC LETTER FEH u'\u0642' # 0x00e2 -> ARABIC LETTER QAF u'\u0643' # 0x00e3 -> ARABIC LETTER KAF u'\u0644' # 0x00e4 -> ARABIC LETTER LAM u'\u0645' # 0x00e5 -> ARABIC LETTER MEEM u'\u0646' # 0x00e6 -> ARABIC LETTER NOON u'\u0647' # 0x00e7 -> ARABIC LETTER HEH u'\u0648' # 0x00e8 -> ARABIC LETTER WAW u'\u0649' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA u'\u064a' # 0x00ea -> ARABIC LETTER YEH u'\u064b' # 0x00eb -> ARABIC FATHATAN u'\u064c' # 0x00ec -> ARABIC DAMMATAN u'\u064d' # 0x00ed -> ARABIC KASRATAN u'\u064e' # 0x00ee -> ARABIC FATHA u'\u064f' # 0x00ef -> ARABIC DAMMA u'\u0650' # 0x00f0 -> ARABIC KASRA u'\u0651' # 0x00f1 -> ARABIC SHADDA u'\u0652' # 0x00f2 -> ARABIC SUKUN u'\u067e' # 0x00f3 -> ARABIC LETTER PEH u'\u0679' # 0x00f4 -> ARABIC LETTER TTEH u'\u0686' # 0x00f5 -> ARABIC LETTER TCHEH u'\u06d5' # 0x00f6 -> ARABIC LETTER AE u'\u06a4' # 0x00f7 -> ARABIC LETTER VEH u'\u06af' # 0x00f8 -> ARABIC LETTER GAF u'\u0688' # 0x00f9 -> ARABIC LETTER DDAL u'\u0691' # 0x00fa -> ARABIC LETTER RREH u'{' # 0x00fb -> LEFT CURLY BRACKET, right-left u'|' # 0x00fc -> VERTICAL LINE, right-left u'}' # 0x00fd -> RIGHT CURLY BRACKET, right-left u'\u0698' # 0x00fe -> ARABIC LETTER JEH u'\u06d2' # 0x00ff -> ARABIC LETTER YEH BARREE ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # CONTROL CHARACTER 0x0001: 0x0001, # CONTROL CHARACTER 0x0002: 0x0002, # CONTROL CHARACTER 0x0003: 0x0003, # CONTROL CHARACTER 0x0004: 0x0004, # CONTROL CHARACTER 0x0005: 0x0005, # CONTROL CHARACTER 0x0006: 0x0006, # CONTROL CHARACTER 0x0007: 0x0007, # CONTROL CHARACTER 0x0008: 0x0008, # CONTROL CHARACTER 0x0009: 0x0009, # CONTROL CHARACTER 0x000a: 0x000a, # CONTROL CHARACTER 0x000b: 0x000b, # CONTROL CHARACTER 0x000c: 0x000c, # CONTROL CHARACTER 0x000d: 0x000d, # CONTROL CHARACTER 0x000e: 0x000e, # CONTROL CHARACTER 0x000f: 0x000f, # CONTROL CHARACTER 0x0010: 0x0010, # CONTROL CHARACTER 0x0011: 0x0011, # CONTROL CHARACTER 0x0012: 0x0012, # CONTROL CHARACTER 0x0013: 0x0013, # CONTROL CHARACTER 0x0014: 0x0014, # CONTROL CHARACTER 0x0015: 0x0015, # CONTROL CHARACTER 0x0016: 0x0016, # CONTROL CHARACTER 0x0017: 0x0017, # CONTROL CHARACTER 0x0018: 0x0018, # CONTROL CHARACTER 0x0019: 0x0019, # CONTROL CHARACTER 0x001a: 0x001a, # CONTROL CHARACTER 0x001b: 0x001b, # CONTROL CHARACTER 0x001c: 0x001c, # CONTROL CHARACTER 0x001d: 0x001d, # CONTROL CHARACTER 0x001e: 0x001e, # CONTROL CHARACTER 0x001f: 0x001f, # CONTROL CHARACTER 0x0020: 0x0020, # SPACE, left-right 0x0020: 0x00a0, # SPACE, right-left 0x0021: 0x0021, # EXCLAMATION MARK, left-right 0x0021: 0x00a1, # EXCLAMATION MARK, right-left 0x0022: 0x0022, # QUOTATION MARK, left-right 0x0022: 0x00a2, # QUOTATION MARK, right-left 0x0023: 0x0023, # NUMBER SIGN, left-right 0x0023: 0x00a3, # NUMBER SIGN, right-left 0x0024: 0x0024, # DOLLAR SIGN, left-right 0x0024: 0x00a4, # DOLLAR SIGN, right-left 0x0025: 0x0025, # PERCENT SIGN, left-right 0x0026: 0x0026, # AMPERSAND, left-right 0x0026: 0x00a6, # AMPERSAND, right-left 0x0027: 0x0027, # APOSTROPHE, left-right 0x0027: 0x00a7, # APOSTROPHE, right-left 0x0028: 0x0028, # LEFT PARENTHESIS, left-right 0x0028: 0x00a8, # LEFT PARENTHESIS, right-left 0x0029: 0x0029, # RIGHT PARENTHESIS, left-right 0x0029: 0x00a9, # RIGHT PARENTHESIS, right-left 0x002a: 0x002a, # ASTERISK, left-right 0x002a: 0x00aa, # ASTERISK, right-left 0x002b: 0x002b, # PLUS SIGN, left-right 0x002b: 0x00ab, # PLUS SIGN, right-left 0x002c: 0x002c, # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR 0x002d: 0x002d, # HYPHEN-MINUS, left-right 0x002d: 0x00ad, # HYPHEN-MINUS, right-left 0x002e: 0x002e, # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR 0x002e: 0x00ae, # FULL STOP, right-left 0x002f: 0x002f, # SOLIDUS, left-right 0x002f: 0x00af, # SOLIDUS, right-left 0x0030: 0x0030, # DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE 0x0032: 0x0032, # DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO 0x0033: 0x0033, # DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE 0x003a: 0x003a, # COLON, left-right 0x003a: 0x00ba, # COLON, right-left 0x003b: 0x003b, # SEMICOLON, left-right 0x003c: 0x003c, # LESS-THAN SIGN, left-right 0x003c: 0x00bc, # LESS-THAN SIGN, right-left 0x003d: 0x003d, # EQUALS SIGN, left-right 0x003d: 0x00bd, # EQUALS SIGN, right-left 0x003e: 0x003e, # GREATER-THAN SIGN, left-right 0x003e: 0x00be, # GREATER-THAN SIGN, right-left 0x003f: 0x003f, # QUESTION MARK, left-right 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET, left-right 0x005b: 0x00db, # LEFT SQUARE BRACKET, right-left 0x005c: 0x005c, # REVERSE SOLIDUS, left-right 0x005c: 0x00dc, # REVERSE SOLIDUS, right-left 0x005d: 0x005d, # RIGHT SQUARE BRACKET, left-right 0x005d: 0x00dd, # RIGHT SQUARE BRACKET, right-left 0x005e: 0x005e, # CIRCUMFLEX ACCENT, left-right 0x005e: 0x00de, # CIRCUMFLEX ACCENT, right-left 0x005f: 0x005f, # LOW LINE, left-right 0x005f: 0x00df, # LOW LINE, right-left 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET, left-right 0x007b: 0x00fb, # LEFT CURLY BRACKET, right-left 0x007c: 0x007c, # VERTICAL LINE, left-right 0x007c: 0x00fc, # VERTICAL LINE, right-left 0x007d: 0x007d, # RIGHT CURLY BRACKET, left-right 0x007d: 0x00fd, # RIGHT CURLY BRACKET, right-left 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # CONTROL CHARACTER 0x00a0: 0x0081, # NO-BREAK SPACE, right-left 0x00ab: 0x008c, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left 0x00bb: 0x0098, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS 0x00f7: 0x009b, # DIVISION SIGN, right-left 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS 0x060c: 0x00ac, # ARABIC COMMA 0x061b: 0x00bb, # ARABIC SEMICOLON 0x061f: 0x00bf, # ARABIC QUESTION MARK 0x0621: 0x00c1, # ARABIC LETTER HAMZA 0x0622: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE 0x0623: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE 0x0624: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE 0x0625: 0x00c5, # ARABIC LETTER ALEF WITH HAMZA BELOW 0x0626: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE 0x0627: 0x00c7, # ARABIC LETTER ALEF 0x0628: 0x00c8, # ARABIC LETTER BEH 0x0629: 0x00c9, # ARABIC LETTER TEH MARBUTA 0x062a: 0x00ca, # ARABIC LETTER TEH 0x062b: 0x00cb, # ARABIC LETTER THEH 0x062c: 0x00cc, # ARABIC LETTER JEEM 0x062d: 0x00cd, # ARABIC LETTER HAH 0x062e: 0x00ce, # ARABIC LETTER KHAH 0x062f: 0x00cf, # ARABIC LETTER DAL 0x0630: 0x00d0, # ARABIC LETTER THAL 0x0631: 0x00d1, # ARABIC LETTER REH 0x0632: 0x00d2, # ARABIC LETTER ZAIN 0x0633: 0x00d3, # ARABIC LETTER SEEN 0x0634: 0x00d4, # ARABIC LETTER SHEEN 0x0635: 0x00d5, # ARABIC LETTER SAD 0x0636: 0x00d6, # ARABIC LETTER DAD 0x0637: 0x00d7, # ARABIC LETTER TAH 0x0638: 0x00d8, # ARABIC LETTER ZAH 0x0639: 0x00d9, # ARABIC LETTER AIN 0x063a: 0x00da, # ARABIC LETTER GHAIN 0x0640: 0x00e0, # ARABIC TATWEEL 0x0641: 0x00e1, # ARABIC LETTER FEH 0x0642: 0x00e2, # ARABIC LETTER QAF 0x0643: 0x00e3, # ARABIC LETTER KAF 0x0644: 0x00e4, # ARABIC LETTER LAM 0x0645: 0x00e5, # ARABIC LETTER MEEM 0x0646: 0x00e6, # ARABIC LETTER NOON 0x0647: 0x00e7, # ARABIC LETTER HEH 0x0648: 0x00e8, # ARABIC LETTER WAW 0x0649: 0x00e9, # ARABIC LETTER ALEF MAKSURA 0x064a: 0x00ea, # ARABIC LETTER YEH 0x064b: 0x00eb, # ARABIC FATHATAN 0x064c: 0x00ec, # ARABIC DAMMATAN 0x064d: 0x00ed, # ARABIC KASRATAN 0x064e: 0x00ee, # ARABIC FATHA 0x064f: 0x00ef, # ARABIC DAMMA 0x0650: 0x00f0, # ARABIC KASRA 0x0651: 0x00f1, # ARABIC SHADDA 0x0652: 0x00f2, # ARABIC SUKUN 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO, right-left (need override) 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE, right-left (need override) 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO, right-left (need override) 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE, right-left (need override) 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR, right-left (need override) 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE, right-left (need override) 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX, right-left (need override) 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN, right-left (need override) 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT, right-left (need override) 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE, right-left (need override) 0x066a: 0x00a5, # ARABIC PERCENT SIGN 0x0679: 0x00f4, # ARABIC LETTER TTEH 0x067e: 0x00f3, # ARABIC LETTER PEH 0x0686: 0x00f5, # ARABIC LETTER TCHEH 0x0688: 0x00f9, # ARABIC LETTER DDAL 0x0691: 0x00fa, # ARABIC LETTER RREH 0x0698: 0x00fe, # ARABIC LETTER JEH 0x06a4: 0x00f7, # ARABIC LETTER VEH 0x06af: 0x00f8, # ARABIC LETTER GAF 0x06ba: 0x008b, # ARABIC LETTER NOON GHUNNA 0x06d2: 0x00ff, # ARABIC LETTER YEH BARREE 0x06d5: 0x00f6, # ARABIC LETTER AE 0x2026: 0x0093, # HORIZONTAL ELLIPSIS, right-left 0x274a: 0x00c0, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left } --- NEW FILE: mac_centeuro.py --- """ Python Character Mapping Codec generated from 'VENDORS/APPLE/CENTEURO.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON 0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0084: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE 0x0088: 0x0105, # LATIN SMALL LETTER A WITH OGONEK 0x0089: 0x010c, # LATIN CAPITAL LETTER C WITH CARON 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x008b: 0x010d, # LATIN SMALL LETTER C WITH CARON 0x008c: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE 0x008d: 0x0107, # LATIN SMALL LETTER C WITH ACUTE 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE 0x0090: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE 0x0091: 0x010e, # LATIN CAPITAL LETTER D WITH CARON 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE 0x0093: 0x010f, # LATIN SMALL LETTER D WITH CARON 0x0094: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON 0x0095: 0x0113, # LATIN SMALL LETTER E WITH MACRON 0x0096: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE 0x0098: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE 0x009d: 0x011a, # LATIN CAPITAL LETTER E WITH CARON 0x009e: 0x011b, # LATIN SMALL LETTER E WITH CARON 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x00a0: 0x2020, # DAGGER 0x00a1: 0x00b0, # DEGREE SIGN 0x00a2: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK 0x00a4: 0x00a7, # SECTION SIGN 0x00a5: 0x2022, # BULLET 0x00a6: 0x00b6, # PILCROW SIGN 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S 0x00a8: 0x00ae, # REGISTERED SIGN 0x00aa: 0x2122, # TRADE MARK SIGN 0x00ab: 0x0119, # LATIN SMALL LETTER E WITH OGONEK 0x00ac: 0x00a8, # DIAERESIS 0x00ad: 0x2260, # NOT EQUAL TO 0x00ae: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA 0x00af: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK 0x00b0: 0x012f, # LATIN SMALL LETTER I WITH OGONEK 0x00b1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO 0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON 0x00b5: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL 0x00b7: 0x2211, # N-ARY SUMMATION 0x00b8: 0x0142, # LATIN SMALL LETTER L WITH STROKE 0x00b9: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA 0x00ba: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA 0x00bb: 0x013d, # LATIN CAPITAL LETTER L WITH CARON 0x00bc: 0x013e, # LATIN SMALL LETTER L WITH CARON 0x00bd: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE 0x00be: 0x013a, # LATIN SMALL LETTER L WITH ACUTE 0x00bf: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA 0x00c0: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA 0x00c1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE 0x00c2: 0x00ac, # NOT SIGN 0x00c3: 0x221a, # SQUARE ROOT 0x00c4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE 0x00c5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON 0x00c6: 0x2206, # INCREMENT 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS 0x00ca: 0x00a0, # NO-BREAK SPACE 0x00cb: 0x0148, # LATIN SMALL LETTER N WITH CARON 0x00cc: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE 0x00ce: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE 0x00cf: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON 0x00d0: 0x2013, # EN DASH 0x00d1: 0x2014, # EM DASH 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK 0x00d6: 0x00f7, # DIVISION SIGN 0x00d7: 0x25ca, # LOZENGE 0x00d8: 0x014d, # LATIN SMALL LETTER O WITH MACRON 0x00d9: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE 0x00da: 0x0155, # LATIN SMALL LETTER R WITH ACUTE 0x00db: 0x0158, # LATIN CAPITAL LETTER R WITH CARON 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 0x00de: 0x0159, # LATIN SMALL LETTER R WITH CARON 0x00df: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA 0x00e0: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA 0x00e1: 0x0160, # LATIN CAPITAL LETTER S WITH CARON 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK 0x00e4: 0x0161, # LATIN SMALL LETTER S WITH CARON 0x00e5: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE 0x00e6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE 0x00e8: 0x0164, # LATIN CAPITAL LETTER T WITH CARON 0x00e9: 0x0165, # LATIN SMALL LETTER T WITH CARON 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE 0x00eb: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON 0x00ec: 0x017e, # LATIN SMALL LETTER Z WITH CARON 0x00ed: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00f0: 0x016b, # LATIN SMALL LETTER U WITH MACRON 0x00f1: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00f3: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE 0x00f4: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE 0x00f5: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE 0x00f6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK 0x00f7: 0x0173, # LATIN SMALL LETTER U WITH OGONEK 0x00f8: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE 0x00f9: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE 0x00fa: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA 0x00fb: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE 0x00fc: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE 0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA 0x00ff: 0x02c7, # CARON }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> CONTROL CHARACTER u'\x01' # 0x0001 -> CONTROL CHARACTER u'\x02' # 0x0002 -> CONTROL CHARACTER u'\x03' # 0x0003 -> CONTROL CHARACTER u'\x04' # 0x0004 -> CONTROL CHARACTER u'\x05' # 0x0005 -> CONTROL CHARACTER u'\x06' # 0x0006 -> CONTROL CHARACTER u'\x07' # 0x0007 -> CONTROL CHARACTER u'\x08' # 0x0008 -> CONTROL CHARACTER u'\t' # 0x0009 -> CONTROL CHARACTER u'\n' # 0x000a -> CONTROL CHARACTER u'\x0b' # 0x000b -> CONTROL CHARACTER u'\x0c' # 0x000c -> CONTROL CHARACTER u'\r' # 0x000d -> CONTROL CHARACTER u'\x0e' # 0x000e -> CONTROL CHARACTER u'\x0f' # 0x000f -> CONTROL CHARACTER u'\x10' # 0x0010 -> CONTROL CHARACTER u'\x11' # 0x0011 -> CONTROL CHARACTER u'\x12' # 0x0012 -> CONTROL CHARACTER u'\x13' # 0x0013 -> CONTROL CHARACTER u'\x14' # 0x0014 -> CONTROL CHARACTER u'\x15' # 0x0015 -> CONTROL CHARACTER u'\x16' # 0x0016 -> CONTROL CHARACTER u'\x17' # 0x0017 -> CONTROL CHARACTER u'\x18' # 0x0018 -> CONTROL CHARACTER u'\x19' # 0x0019 -> CONTROL CHARACTER u'\x1a' # 0x001a -> CONTROL CHARACTER u'\x1b' # 0x001b -> CONTROL CHARACTER u'\x1c' # 0x001c -> CONTROL CHARACTER u'\x1d' # 0x001d -> CONTROL CHARACTER u'\x1e' # 0x001e -> CONTROL CHARACTER u'\x1f' # 0x001f -> CONTROL CHARACTER u' ' # 0x0020 -> SPACE u'!' # 0x0021 -> EXCLAMATION MARK u'"' # 0x0022 -> QUOTATION MARK u'#' # 0x0023 -> NUMBER SIGN u'$' # 0x0024 -> DOLLAR SIGN u'%' # 0x0025 -> PERCENT SIGN u'&' # 0x0026 -> AMPERSAND u"'" # 0x0027 -> APOSTROPHE u'(' # 0x0028 -> LEFT PARENTHESIS u')' # 0x0029 -> RIGHT PARENTHESIS u'*' # 0x002a -> ASTERISK u'+' # 0x002b -> PLUS SIGN u',' # 0x002c -> COMMA u'-' # 0x002d -> HYPHEN-MINUS u'.' # 0x002e -> FULL STOP u'/' # 0x002f -> SOLIDUS u'0' # 0x0030 -> DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE u'2' # 0x0032 -> DIGIT TWO u'3' # 0x0033 -> DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE u':' # 0x003a -> COLON u';' # 0x003b -> SEMICOLON u'<' # 0x003c -> LESS-THAN SIGN u'=' # 0x003d -> EQUALS SIGN u'>' # 0x003e -> GREATER-THAN SIGN u'?' # 0x003f -> QUESTION MARK u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET u'\\' # 0x005c -> REVERSE SOLIDUS u']' # 0x005d -> RIGHT SQUARE BRACKET u'^' # 0x005e -> CIRCUMFLEX ACCENT u'_' # 0x005f -> LOW LINE u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET u'|' # 0x007c -> VERTICAL LINE u'}' # 0x007d -> RIGHT CURLY BRACKET u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> CONTROL CHARACTER u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\u0100' # 0x0081 -> LATIN CAPITAL LETTER A WITH MACRON u'\u0101' # 0x0082 -> LATIN SMALL LETTER A WITH MACRON u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE u'\u0104' # 0x0084 -> LATIN CAPITAL LETTER A WITH OGONEK u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE u'\u0105' # 0x0088 -> LATIN SMALL LETTER A WITH OGONEK u'\u010c' # 0x0089 -> LATIN CAPITAL LETTER C WITH CARON u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS u'\u010d' # 0x008b -> LATIN SMALL LETTER C WITH CARON u'\u0106' # 0x008c -> LATIN CAPITAL LETTER C WITH ACUTE u'\u0107' # 0x008d -> LATIN SMALL LETTER C WITH ACUTE u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE u'\u0179' # 0x008f -> LATIN CAPITAL LETTER Z WITH ACUTE u'\u017a' # 0x0090 -> LATIN SMALL LETTER Z WITH ACUTE u'\u010e' # 0x0091 -> LATIN CAPITAL LETTER D WITH CARON u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE u'\u010f' # 0x0093 -> LATIN SMALL LETTER D WITH CARON u'\u0112' # 0x0094 -> LATIN CAPITAL LETTER E WITH MACRON u'\u0113' # 0x0095 -> LATIN SMALL LETTER E WITH MACRON u'\u0116' # 0x0096 -> LATIN CAPITAL LETTER E WITH DOT ABOVE u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE u'\u0117' # 0x0098 -> LATIN SMALL LETTER E WITH DOT ABOVE u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf5' # 0x009b -> LATIN SMALL LETTER O WITH TILDE u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE u'\u011a' # 0x009d -> LATIN CAPITAL LETTER E WITH CARON u'\u011b' # 0x009e -> LATIN SMALL LETTER E WITH CARON u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS u'\u2020' # 0x00a0 -> DAGGER u'\xb0' # 0x00a1 -> DEGREE SIGN u'\u0118' # 0x00a2 -> LATIN CAPITAL LETTER E WITH OGONEK u'\xa3' # 0x00a3 -> POUND SIGN u'\xa7' # 0x00a4 -> SECTION SIGN u'\u2022' # 0x00a5 -> BULLET u'\xb6' # 0x00a6 -> PILCROW SIGN u'\xdf' # 0x00a7 -> LATIN SMALL LETTER SHARP S u'\xae' # 0x00a8 -> REGISTERED SIGN u'\xa9' # 0x00a9 -> COPYRIGHT SIGN u'\u2122' # 0x00aa -> TRADE MARK SIGN u'\u0119' # 0x00ab -> LATIN SMALL LETTER E WITH OGONEK u'\xa8' # 0x00ac -> DIAERESIS u'\u2260' # 0x00ad -> NOT EQUAL TO u'\u0123' # 0x00ae -> LATIN SMALL LETTER G WITH CEDILLA u'\u012e' # 0x00af -> LATIN CAPITAL LETTER I WITH OGONEK u'\u012f' # 0x00b0 -> LATIN SMALL LETTER I WITH OGONEK u'\u012a' # 0x00b1 -> LATIN CAPITAL LETTER I WITH MACRON u'\u2264' # 0x00b2 -> LESS-THAN OR EQUAL TO u'\u2265' # 0x00b3 -> GREATER-THAN OR EQUAL TO u'\u012b' # 0x00b4 -> LATIN SMALL LETTER I WITH MACRON u'\u0136' # 0x00b5 -> LATIN CAPITAL LETTER K WITH CEDILLA u'\u2202' # 0x00b6 -> PARTIAL DIFFERENTIAL u'\u2211' # 0x00b7 -> N-ARY SUMMATION u'\u0142' # 0x00b8 -> LATIN SMALL LETTER L WITH STROKE u'\u013b' # 0x00b9 -> LATIN CAPITAL LETTER L WITH CEDILLA u'\u013c' # 0x00ba -> LATIN SMALL LETTER L WITH CEDILLA u'\u013d' # 0x00bb -> LATIN CAPITAL LETTER L WITH CARON u'\u013e' # 0x00bc -> LATIN SMALL LETTER L WITH CARON u'\u0139' # 0x00bd -> LATIN CAPITAL LETTER L WITH ACUTE u'\u013a' # 0x00be -> LATIN SMALL LETTER L WITH ACUTE u'\u0145' # 0x00bf -> LATIN CAPITAL LETTER N WITH CEDILLA u'\u0146' # 0x00c0 -> LATIN SMALL LETTER N WITH CEDILLA u'\u0143' # 0x00c1 -> LATIN CAPITAL LETTER N WITH ACUTE u'\xac' # 0x00c2 -> NOT SIGN u'\u221a' # 0x00c3 -> SQUARE ROOT u'\u0144' # 0x00c4 -> LATIN SMALL LETTER N WITH ACUTE u'\u0147' # 0x00c5 -> LATIN CAPITAL LETTER N WITH CARON u'\u2206' # 0x00c6 -> INCREMENT u'\xab' # 0x00c7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0x00c8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u2026' # 0x00c9 -> HORIZONTAL ELLIPSIS u'\xa0' # 0x00ca -> NO-BREAK SPACE u'\u0148' # 0x00cb -> LATIN SMALL LETTER N WITH CARON u'\u0150' # 0x00cc -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE u'\xd5' # 0x00cd -> LATIN CAPITAL LETTER O WITH TILDE u'\u0151' # 0x00ce -> LATIN SMALL LETTER O WITH DOUBLE ACUTE u'\u014c' # 0x00cf -> LATIN CAPITAL LETTER O WITH MACRON u'\u2013' # 0x00d0 -> EN DASH u'\u2014' # 0x00d1 -> EM DASH u'\u201c' # 0x00d2 -> LEFT DOUBLE QUOTATION MARK u'\u201d' # 0x00d3 -> RIGHT DOUBLE QUOTATION MARK u'\u2018' # 0x00d4 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x00d5 -> RIGHT SINGLE QUOTATION MARK u'\xf7' # 0x00d6 -> DIVISION SIGN u'\u25ca' # 0x00d7 -> LOZENGE u'\u014d' # 0x00d8 -> LATIN SMALL LETTER O WITH MACRON u'\u0154' # 0x00d9 -> LATIN CAPITAL LETTER R WITH ACUTE u'\u0155' # 0x00da -> LATIN SMALL LETTER R WITH ACUTE u'\u0158' # 0x00db -> LATIN CAPITAL LETTER R WITH CARON u'\u2039' # 0x00dc -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK u'\u203a' # 0x00dd -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK u'\u0159' # 0x00de -> LATIN SMALL LETTER R WITH CARON u'\u0156' # 0x00df -> LATIN CAPITAL LETTER R WITH CEDILLA u'\u0157' # 0x00e0 -> LATIN SMALL LETTER R WITH CEDILLA u'\u0160' # 0x00e1 -> LATIN CAPITAL LETTER S WITH CARON u'\u201a' # 0x00e2 -> SINGLE LOW-9 QUOTATION MARK u'\u201e' # 0x00e3 -> DOUBLE LOW-9 QUOTATION MARK u'\u0161' # 0x00e4 -> LATIN SMALL LETTER S WITH CARON u'\u015a' # 0x00e5 -> LATIN CAPITAL LETTER S WITH ACUTE u'\u015b' # 0x00e6 -> LATIN SMALL LETTER S WITH ACUTE u'\xc1' # 0x00e7 -> LATIN CAPITAL LETTER A WITH ACUTE u'\u0164' # 0x00e8 -> LATIN CAPITAL LETTER T WITH CARON u'\u0165' # 0x00e9 -> LATIN SMALL LETTER T WITH CARON u'\xcd' # 0x00ea -> LATIN CAPITAL LETTER I WITH ACUTE u'\u017d' # 0x00eb -> LATIN CAPITAL LETTER Z WITH CARON u'\u017e' # 0x00ec -> LATIN SMALL LETTER Z WITH CARON u'\u016a' # 0x00ed -> LATIN CAPITAL LETTER U WITH MACRON u'\xd3' # 0x00ee -> LATIN CAPITAL LETTER O WITH ACUTE u'\xd4' # 0x00ef -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\u016b' # 0x00f0 -> LATIN SMALL LETTER U WITH MACRON u'\u016e' # 0x00f1 -> LATIN CAPITAL LETTER U WITH RING ABOVE u'\xda' # 0x00f2 -> LATIN CAPITAL LETTER U WITH ACUTE u'\u016f' # 0x00f3 -> LATIN SMALL LETTER U WITH RING ABOVE u'\u0170' # 0x00f4 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE u'\u0171' # 0x00f5 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE u'\u0172' # 0x00f6 -> LATIN CAPITAL LETTER U WITH OGONEK u'\u0173' # 0x00f7 -> LATIN SMALL LETTER U WITH OGONEK u'\xdd' # 0x00f8 -> LATIN CAPITAL LETTER Y WITH ACUTE u'\xfd' # 0x00f9 -> LATIN SMALL LETTER Y WITH ACUTE u'\u0137' # 0x00fa -> LATIN SMALL LETTER K WITH CEDILLA u'\u017b' # 0x00fb -> LATIN CAPITAL LETTER Z WITH DOT ABOVE u'\u0141' # 0x00fc -> LATIN CAPITAL LETTER L WITH STROKE u'\u017c' # 0x00fd -> LATIN SMALL LETTER Z WITH DOT ABOVE u'\u0122' # 0x00fe -> LATIN CAPITAL LETTER G WITH CEDILLA u'\u02c7' # 0x00ff -> CARON ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # CONTROL CHARACTER 0x0001: 0x0001, # CONTROL CHARACTER 0x0002: 0x0002, # CONTROL CHARACTER 0x0003: 0x0003, # CONTROL CHARACTER 0x0004: 0x0004, # CONTROL CHARACTER 0x0005: 0x0005, # CONTROL CHARACTER 0x0006: 0x0006, # CONTROL CHARACTER 0x0007: 0x0007, # CONTROL CHARACTER 0x0008: 0x0008, # CONTROL CHARACTER 0x0009: 0x0009, # CONTROL CHARACTER 0x000a: 0x000a, # CONTROL CHARACTER 0x000b: 0x000b, # CONTROL CHARACTER 0x000c: 0x000c, # CONTROL CHARACTER 0x000d: 0x000d, # CONTROL CHARACTER 0x000e: 0x000e, # CONTROL CHARACTER 0x000f: 0x000f, # CONTROL CHARACTER 0x0010: 0x0010, # CONTROL CHARACTER 0x0011: 0x0011, # CONTROL CHARACTER 0x0012: 0x0012, # CONTROL CHARACTER 0x0013: 0x0013, # CONTROL CHARACTER 0x0014: 0x0014, # CONTROL CHARACTER 0x0015: 0x0015, # CONTROL CHARACTER 0x0016: 0x0016, # CONTROL CHARACTER 0x0017: 0x0017, # CONTROL CHARACTER 0x0018: 0x0018, # CONTROL CHARACTER 0x0019: 0x0019, # CONTROL CHARACTER 0x001a: 0x001a, # CONTROL CHARACTER 0x001b: 0x001b, # CONTROL CHARACTER 0x001c: 0x001c, # CONTROL CHARACTER 0x001d: 0x001d, # CONTROL CHARACTER 0x001e: 0x001e, # CONTROL CHARACTER 0x001f: 0x001f, # CONTROL CHARACTER 0x0020: 0x0020, # SPACE 0x0021: 0x0021, # EXCLAMATION MARK 0x0022: 0x0022, # QUOTATION MARK 0x0023: 0x0023, # NUMBER SIGN 0x0024: 0x0024, # DOLLAR SIGN 0x0025: 0x0025, # PERCENT SIGN 0x0026: 0x0026, # AMPERSAND 0x0027: 0x0027, # APOSTROPHE 0x0028: 0x0028, # LEFT PARENTHESIS 0x0029: 0x0029, # RIGHT PARENTHESIS 0x002a: 0x002a, # ASTERISK 0x002b: 0x002b, # PLUS SIGN 0x002c: 0x002c, # COMMA 0x002d: 0x002d, # HYPHEN-MINUS 0x002e: 0x002e, # FULL STOP 0x002f: 0x002f, # SOLIDUS 0x0030: 0x0030, # DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE 0x0032: 0x0032, # DIGIT TWO 0x0033: 0x0033, # DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE 0x003a: 0x003a, # COLON 0x003b: 0x003b, # SEMICOLON 0x003c: 0x003c, # LESS-THAN SIGN 0x003d: 0x003d, # EQUALS SIGN 0x003e: 0x003e, # GREATER-THAN SIGN 0x003f: 0x003f, # QUESTION MARK 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005c: 0x005c, # REVERSE SOLIDUS 0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005f: 0x005f, # LOW LINE 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET 0x007c: 0x007c, # VERTICAL LINE 0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # CONTROL CHARACTER 0x00a0: 0x00ca, # NO-BREAK SPACE 0x00a3: 0x00a3, # POUND SIGN 0x00a7: 0x00a4, # SECTION SIGN 0x00a8: 0x00ac, # DIAERESIS 0x00a9: 0x00a9, # COPYRIGHT SIGN 0x00ab: 0x00c7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00ac: 0x00c2, # NOT SIGN 0x00ae: 0x00a8, # REGISTERED SIGN 0x00b0: 0x00a1, # DEGREE SIGN 0x00b6: 0x00a6, # PILCROW SIGN 0x00bb: 0x00c8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00c1: 0x00e7, # LATIN CAPITAL LETTER A WITH ACUTE 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE 0x00cd: 0x00ea, # LATIN CAPITAL LETTER I WITH ACUTE 0x00d3: 0x00ee, # LATIN CAPITAL LETTER O WITH ACUTE 0x00d4: 0x00ef, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00d5: 0x00cd, # LATIN CAPITAL LETTER O WITH TILDE 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x00da: 0x00f2, # LATIN CAPITAL LETTER U WITH ACUTE 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00dd: 0x00f8, # LATIN CAPITAL LETTER Y WITH ACUTE 0x00df: 0x00a7, # LATIN SMALL LETTER SHARP S 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00f5: 0x009b, # LATIN SMALL LETTER O WITH TILDE 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS 0x00f7: 0x00d6, # DIVISION SIGN 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS 0x00fd: 0x00f9, # LATIN SMALL LETTER Y WITH ACUTE 0x0100: 0x0081, # LATIN CAPITAL LETTER A WITH MACRON 0x0101: 0x0082, # LATIN SMALL LETTER A WITH MACRON 0x0104: 0x0084, # LATIN CAPITAL LETTER A WITH OGONEK 0x0105: 0x0088, # LATIN SMALL LETTER A WITH OGONEK 0x0106: 0x008c, # LATIN CAPITAL LETTER C WITH ACUTE 0x0107: 0x008d, # LATIN SMALL LETTER C WITH ACUTE 0x010c: 0x0089, # LATIN CAPITAL LETTER C WITH CARON 0x010d: 0x008b, # LATIN SMALL LETTER C WITH CARON 0x010e: 0x0091, # LATIN CAPITAL LETTER D WITH CARON 0x010f: 0x0093, # LATIN SMALL LETTER D WITH CARON 0x0112: 0x0094, # LATIN CAPITAL LETTER E WITH MACRON 0x0113: 0x0095, # LATIN SMALL LETTER E WITH MACRON 0x0116: 0x0096, # LATIN CAPITAL LETTER E WITH DOT ABOVE 0x0117: 0x0098, # LATIN SMALL LETTER E WITH DOT ABOVE 0x0118: 0x00a2, # LATIN CAPITAL LETTER E WITH OGONEK 0x0119: 0x00ab, # LATIN SMALL LETTER E WITH OGONEK 0x011a: 0x009d, # LATIN CAPITAL LETTER E WITH CARON 0x011b: 0x009e, # LATIN SMALL LETTER E WITH CARON 0x0122: 0x00fe, # LATIN CAPITAL LETTER G WITH CEDILLA 0x0123: 0x00ae, # LATIN SMALL LETTER G WITH CEDILLA 0x012a: 0x00b1, # LATIN CAPITAL LETTER I WITH MACRON 0x012b: 0x00b4, # LATIN SMALL LETTER I WITH MACRON 0x012e: 0x00af, # LATIN CAPITAL LETTER I WITH OGONEK 0x012f: 0x00b0, # LATIN SMALL LETTER I WITH OGONEK 0x0136: 0x00b5, # LATIN CAPITAL LETTER K WITH CEDILLA 0x0137: 0x00fa, # LATIN SMALL LETTER K WITH CEDILLA 0x0139: 0x00bd, # LATIN CAPITAL LETTER L WITH ACUTE 0x013a: 0x00be, # LATIN SMALL LETTER L WITH ACUTE 0x013b: 0x00b9, # LATIN CAPITAL LETTER L WITH CEDILLA 0x013c: 0x00ba, # LATIN SMALL LETTER L WITH CEDILLA 0x013d: 0x00bb, # LATIN CAPITAL LETTER L WITH CARON 0x013e: 0x00bc, # LATIN SMALL LETTER L WITH CARON 0x0141: 0x00fc, # LATIN CAPITAL LETTER L WITH STROKE 0x0142: 0x00b8, # LATIN SMALL LETTER L WITH STROKE 0x0143: 0x00c1, # LATIN CAPITAL LETTER N WITH ACUTE 0x0144: 0x00c4, # LATIN SMALL LETTER N WITH ACUTE 0x0145: 0x00bf, # LATIN CAPITAL LETTER N WITH CEDILLA 0x0146: 0x00c0, # LATIN SMALL LETTER N WITH CEDILLA 0x0147: 0x00c5, # LATIN CAPITAL LETTER N WITH CARON 0x0148: 0x00cb, # LATIN SMALL LETTER N WITH CARON 0x014c: 0x00cf, # LATIN CAPITAL LETTER O WITH MACRON 0x014d: 0x00d8, # LATIN SMALL LETTER O WITH MACRON 0x0150: 0x00cc, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE 0x0151: 0x00ce, # LATIN SMALL LETTER O WITH DOUBLE ACUTE 0x0154: 0x00d9, # LATIN CAPITAL LETTER R WITH ACUTE 0x0155: 0x00da, # LATIN SMALL LETTER R WITH ACUTE 0x0156: 0x00df, # LATIN CAPITAL LETTER R WITH CEDILLA 0x0157: 0x00e0, # LATIN SMALL LETTER R WITH CEDILLA 0x0158: 0x00db, # LATIN CAPITAL LETTER R WITH CARON 0x0159: 0x00de, # LATIN SMALL LETTER R WITH CARON 0x015a: 0x00e5, # LATIN CAPITAL LETTER S WITH ACUTE 0x015b: 0x00e6, # LATIN SMALL LETTER S WITH ACUTE 0x0160: 0x00e1, # LATIN CAPITAL LETTER S WITH CARON 0x0161: 0x00e4, # LATIN SMALL LETTER S WITH CARON 0x0164: 0x00e8, # LATIN CAPITAL LETTER T WITH CARON 0x0165: 0x00e9, # LATIN SMALL LETTER T WITH CARON 0x016a: 0x00ed, # LATIN CAPITAL LETTER U WITH MACRON 0x016b: 0x00f0, # LATIN SMALL LETTER U WITH MACRON 0x016e: 0x00f1, # LATIN CAPITAL LETTER U WITH RING ABOVE 0x016f: 0x00f3, # LATIN SMALL LETTER U WITH RING ABOVE 0x0170: 0x00f4, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE 0x0171: 0x00f5, # LATIN SMALL LETTER U WITH DOUBLE ACUTE 0x0172: 0x00f6, # LATIN CAPITAL LETTER U WITH OGONEK 0x0173: 0x00f7, # LATIN SMALL LETTER U WITH OGONEK 0x0179: 0x008f, # LATIN CAPITAL LETTER Z WITH ACUTE 0x017a: 0x0090, # LATIN SMALL LETTER Z WITH ACUTE 0x017b: 0x00fb, # LATIN CAPITAL LETTER Z WITH DOT ABOVE 0x017c: 0x00fd, # LATIN SMALL LETTER Z WITH DOT ABOVE 0x017d: 0x00eb, # LATIN CAPITAL LETTER Z WITH CARON 0x017e: 0x00ec, # LATIN SMALL LETTER Z WITH CARON 0x02c7: 0x00ff, # CARON 0x2013: 0x00d0, # EN DASH 0x2014: 0x00d1, # EM DASH 0x2018: 0x00d4, # LEFT SINGLE QUOTATION MARK 0x2019: 0x00d5, # RIGHT SINGLE QUOTATION MARK 0x201a: 0x00e2, # SINGLE LOW-9 QUOTATION MARK 0x201c: 0x00d2, # LEFT DOUBLE QUOTATION MARK 0x201d: 0x00d3, # RIGHT DOUBLE QUOTATION MARK 0x201e: 0x00e3, # DOUBLE LOW-9 QUOTATION MARK 0x2020: 0x00a0, # DAGGER 0x2022: 0x00a5, # BULLET 0x2026: 0x00c9, # HORIZONTAL ELLIPSIS 0x2039: 0x00dc, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK 0x203a: 0x00dd, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 0x2122: 0x00aa, # TRADE MARK SIGN 0x2202: 0x00b6, # PARTIAL DIFFERENTIAL 0x2206: 0x00c6, # INCREMENT 0x2211: 0x00b7, # N-ARY SUMMATION 0x221a: 0x00c3, # SQUARE ROOT 0x2260: 0x00ad, # NOT EQUAL TO 0x2264: 0x00b2, # LESS-THAN OR EQUAL TO 0x2265: 0x00b3, # GREATER-THAN OR EQUAL TO 0x25ca: 0x00d7, # LOZENGE } --- NEW FILE: mac_croatian.py --- """ Python Character Mapping Codec generated from 'VENDORS/APPLE/CROATIAN.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x00a0: 0x2020, # DAGGER 0x00a1: 0x00b0, # DEGREE SIGN 0x00a4: 0x00a7, # SECTION SIGN 0x00a5: 0x2022, # BULLET 0x00a6: 0x00b6, # PILCROW SIGN 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S 0x00a8: 0x00ae, # REGISTERED SIGN 0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON 0x00aa: 0x2122, # TRADE MARK SIGN 0x00ab: 0x00b4, # ACUTE ACCENT 0x00ac: 0x00a8, # DIAERESIS 0x00ad: 0x2260, # NOT EQUAL TO 0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON 0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE 0x00b0: 0x221e, # INFINITY 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO 0x00b4: 0x2206, # INCREMENT 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL 0x00b7: 0x2211, # N-ARY SUMMATION 0x00b8: 0x220f, # N-ARY PRODUCT 0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON 0x00ba: 0x222b, # INTEGRAL 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR 0x00bd: 0x03a9, # GREEK CAPITAL LETTER OMEGA 0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON 0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE 0x00c0: 0x00bf, # INVERTED QUESTION MARK 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK 0x00c2: 0x00ac, # NOT SIGN 0x00c3: 0x221a, # SQUARE ROOT 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK 0x00c5: 0x2248, # ALMOST EQUAL TO 0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS 0x00ca: 0x00a0, # NO-BREAK SPACE 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE 0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE 0x00d1: 0x2014, # EM DASH 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK 0x00d6: 0x00f7, # DIVISION SIGN 0x00d7: 0x25ca, # LOZENGE 0x00d8: 0xf8ff, # Apple logo 0x00d9: 0x00a9, # COPYRIGHT SIGN 0x00da: 0x2044, # FRACTION SLASH 0x00db: 0x20ac, # EURO SIGN 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 0x00de: 0x00c6, # LATIN CAPITAL LETTER AE 0x00df: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00e0: 0x2013, # EN DASH 0x00e1: 0x00b7, # MIDDLE DOT 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK 0x00e4: 0x2030, # PER MILLE SIGN 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE 0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE 0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT 0x00f7: 0x02dc, # SMALL TILDE 0x00f8: 0x00af, # MACRON 0x00f9: 0x03c0, # GREEK SMALL LETTER PI 0x00fa: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS 0x00fb: 0x02da, # RING ABOVE 0x00fc: 0x00b8, # CEDILLA 0x00fd: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x00fe: 0x00e6, # LATIN SMALL LETTER AE 0x00ff: 0x02c7, # CARON }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> CONTROL CHARACTER u'\x01' # 0x0001 -> CONTROL CHARACTER u'\x02' # 0x0002 -> CONTROL CHARACTER u'\x03' # 0x0003 -> CONTROL CHARACTER u'\x04' # 0x0004 -> CONTROL CHARACTER u'\x05' # 0x0005 -> CONTROL CHARACTER u'\x06' # 0x0006 -> CONTROL CHARACTER u'\x07' # 0x0007 -> CONTROL CHARACTER u'\x08' # 0x0008 -> CONTROL CHARACTER u'\t' # 0x0009 -> CONTROL CHARACTER u'\n' # 0x000a -> CONTROL CHARACTER u'\x0b' # 0x000b -> CONTROL CHARACTER u'\x0c' # 0x000c -> CONTROL CHARACTER u'\r' # 0x000d -> CONTROL CHARACTER u'\x0e' # 0x000e -> CONTROL CHARACTER u'\x0f' # 0x000f -> CONTROL CHARACTER u'\x10' # 0x0010 -> CONTROL CHARACTER u'\x11' # 0x0011 -> CONTROL CHARACTER u'\x12' # 0x0012 -> CONTROL CHARACTER u'\x13' # 0x0013 -> CONTROL CHARACTER u'\x14' # 0x0014 -> CONTROL CHARACTER u'\x15' # 0x0015 -> CONTROL CHARACTER u'\x16' # 0x0016 -> CONTROL CHARACTER u'\x17' # 0x0017 -> CONTROL CHARACTER u'\x18' # 0x0018 -> CONTROL CHARACTER u'\x19' # 0x0019 -> CONTROL CHARACTER u'\x1a' # 0x001a -> CONTROL CHARACTER u'\x1b' # 0x001b -> CONTROL CHARACTER u'\x1c' # 0x001c -> CONTROL CHARACTER u'\x1d' # 0x001d -> CONTROL CHARACTER u'\x1e' # 0x001e -> CONTROL CHARACTER u'\x1f' # 0x001f -> CONTROL CHARACTER u' ' # 0x0020 -> SPACE u'!' # 0x0021 -> EXCLAMATION MARK u'"' # 0x0022 -> QUOTATION MARK u'#' # 0x0023 -> NUMBER SIGN u'$' # 0x0024 -> DOLLAR SIGN u'%' # 0x0025 -> PERCENT SIGN u'&' # 0x0026 -> AMPERSAND u"'" # 0x0027 -> APOSTROPHE u'(' # 0x0028 -> LEFT PARENTHESIS u')' # 0x0029 -> RIGHT PARENTHESIS u'*' # 0x002a -> ASTERISK u'+' # 0x002b -> PLUS SIGN u',' # 0x002c -> COMMA u'-' # 0x002d -> HYPHEN-MINUS u'.' # 0x002e -> FULL STOP u'/' # 0x002f -> SOLIDUS u'0' # 0x0030 -> DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE u'2' # 0x0032 -> DIGIT TWO u'3' # 0x0033 -> DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE u':' # 0x003a -> COLON u';' # 0x003b -> SEMICOLON u'<' # 0x003c -> LESS-THAN SIGN u'=' # 0x003d -> EQUALS SIGN u'>' # 0x003e -> GREATER-THAN SIGN u'?' # 0x003f -> QUESTION MARK u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET u'\\' # 0x005c -> REVERSE SOLIDUS u']' # 0x005d -> RIGHT SQUARE BRACKET u'^' # 0x005e -> CIRCUMFLEX ACCENT u'_' # 0x005f -> LOW LINE u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET u'|' # 0x007c -> VERTICAL LINE u'}' # 0x007d -> RIGHT CURLY BRACKET u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> CONTROL CHARACTER u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0x0081 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS u'\xe3' # 0x008b -> LATIN SMALL LETTER A WITH TILDE u'\xe5' # 0x008c -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE u'\xec' # 0x0093 -> LATIN SMALL LETTER I WITH GRAVE u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE u'\xf2' # 0x0098 -> LATIN SMALL LETTER O WITH GRAVE u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf5' # 0x009b -> LATIN SMALL LETTER O WITH TILDE u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS u'\u2020' # 0x00a0 -> DAGGER u'\xb0' # 0x00a1 -> DEGREE SIGN u'\xa2' # 0x00a2 -> CENT SIGN u'\xa3' # 0x00a3 -> POUND SIGN u'\xa7' # 0x00a4 -> SECTION SIGN u'\u2022' # 0x00a5 -> BULLET u'\xb6' # 0x00a6 -> PILCROW SIGN u'\xdf' # 0x00a7 -> LATIN SMALL LETTER SHARP S u'\xae' # 0x00a8 -> REGISTERED SIGN u'\u0160' # 0x00a9 -> LATIN CAPITAL LETTER S WITH CARON u'\u2122' # 0x00aa -> TRADE MARK SIGN u'\xb4' # 0x00ab -> ACUTE ACCENT u'\xa8' # 0x00ac -> DIAERESIS u'\u2260' # 0x00ad -> NOT EQUAL TO u'\u017d' # 0x00ae -> LATIN CAPITAL LETTER Z WITH CARON u'\xd8' # 0x00af -> LATIN CAPITAL LETTER O WITH STROKE u'\u221e' # 0x00b0 -> INFINITY u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN u'\u2264' # 0x00b2 -> LESS-THAN OR EQUAL TO u'\u2265' # 0x00b3 -> GREATER-THAN OR EQUAL TO u'\u2206' # 0x00b4 -> INCREMENT u'\xb5' # 0x00b5 -> MICRO SIGN u'\u2202' # 0x00b6 -> PARTIAL DIFFERENTIAL u'\u2211' # 0x00b7 -> N-ARY SUMMATION u'\u220f' # 0x00b8 -> N-ARY PRODUCT u'\u0161' # 0x00b9 -> LATIN SMALL LETTER S WITH CARON u'\u222b' # 0x00ba -> INTEGRAL u'\xaa' # 0x00bb -> FEMININE ORDINAL INDICATOR u'\xba' # 0x00bc -> MASCULINE ORDINAL INDICATOR u'\u03a9' # 0x00bd -> GREEK CAPITAL LETTER OMEGA u'\u017e' # 0x00be -> LATIN SMALL LETTER Z WITH CARON u'\xf8' # 0x00bf -> LATIN SMALL LETTER O WITH STROKE u'\xbf' # 0x00c0 -> INVERTED QUESTION MARK u'\xa1' # 0x00c1 -> INVERTED EXCLAMATION MARK u'\xac' # 0x00c2 -> NOT SIGN u'\u221a' # 0x00c3 -> SQUARE ROOT u'\u0192' # 0x00c4 -> LATIN SMALL LETTER F WITH HOOK u'\u2248' # 0x00c5 -> ALMOST EQUAL TO u'\u0106' # 0x00c6 -> LATIN CAPITAL LETTER C WITH ACUTE u'\xab' # 0x00c7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u010c' # 0x00c8 -> LATIN CAPITAL LETTER C WITH CARON u'\u2026' # 0x00c9 -> HORIZONTAL ELLIPSIS u'\xa0' # 0x00ca -> NO-BREAK SPACE u'\xc0' # 0x00cb -> LATIN CAPITAL LETTER A WITH GRAVE u'\xc3' # 0x00cc -> LATIN CAPITAL LETTER A WITH TILDE u'\xd5' # 0x00cd -> LATIN CAPITAL LETTER O WITH TILDE u'\u0152' # 0x00ce -> LATIN CAPITAL LIGATURE OE u'\u0153' # 0x00cf -> LATIN SMALL LIGATURE OE u'\u0110' # 0x00d0 -> LATIN CAPITAL LETTER D WITH STROKE u'\u2014' # 0x00d1 -> EM DASH u'\u201c' # 0x00d2 -> LEFT DOUBLE QUOTATION MARK u'\u201d' # 0x00d3 -> RIGHT DOUBLE QUOTATION MARK u'\u2018' # 0x00d4 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x00d5 -> RIGHT SINGLE QUOTATION MARK u'\xf7' # 0x00d6 -> DIVISION SIGN u'\u25ca' # 0x00d7 -> LOZENGE u'\uf8ff' # 0x00d8 -> Apple logo u'\xa9' # 0x00d9 -> COPYRIGHT SIGN u'\u2044' # 0x00da -> FRACTION SLASH u'\u20ac' # 0x00db -> EURO SIGN u'\u2039' # 0x00dc -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK u'\u203a' # 0x00dd -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK u'\xc6' # 0x00de -> LATIN CAPITAL LETTER AE u'\xbb' # 0x00df -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u2013' # 0x00e0 -> EN DASH u'\xb7' # 0x00e1 -> MIDDLE DOT u'\u201a' # 0x00e2 -> SINGLE LOW-9 QUOTATION MARK u'\u201e' # 0x00e3 -> DOUBLE LOW-9 QUOTATION MARK u'\u2030' # 0x00e4 -> PER MILLE SIGN u'\xc2' # 0x00e5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\u0107' # 0x00e6 -> LATIN SMALL LETTER C WITH ACUTE u'\xc1' # 0x00e7 -> LATIN CAPITAL LETTER A WITH ACUTE u'\u010d' # 0x00e8 -> LATIN SMALL LETTER C WITH CARON u'\xc8' # 0x00e9 -> LATIN CAPITAL LETTER E WITH GRAVE u'\xcd' # 0x00ea -> LATIN CAPITAL LETTER I WITH ACUTE u'\xce' # 0x00eb -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX u'\xcf' # 0x00ec -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\xcc' # 0x00ed -> LATIN CAPITAL LETTER I WITH GRAVE u'\xd3' # 0x00ee -> LATIN CAPITAL LETTER O WITH ACUTE u'\xd4' # 0x00ef -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\u0111' # 0x00f0 -> LATIN SMALL LETTER D WITH STROKE u'\xd2' # 0x00f1 -> LATIN CAPITAL LETTER O WITH GRAVE u'\xda' # 0x00f2 -> LATIN CAPITAL LETTER U WITH ACUTE u'\xdb' # 0x00f3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX u'\xd9' # 0x00f4 -> LATIN CAPITAL LETTER U WITH GRAVE u'\u0131' # 0x00f5 -> LATIN SMALL LETTER DOTLESS I u'\u02c6' # 0x00f6 -> MODIFIER LETTER CIRCUMFLEX ACCENT u'\u02dc' # 0x00f7 -> SMALL TILDE u'\xaf' # 0x00f8 -> MACRON u'\u03c0' # 0x00f9 -> GREEK SMALL LETTER PI u'\xcb' # 0x00fa -> LATIN CAPITAL LETTER E WITH DIAERESIS u'\u02da' # 0x00fb -> RING ABOVE u'\xb8' # 0x00fc -> CEDILLA u'\xca' # 0x00fd -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX u'\xe6' # 0x00fe -> LATIN SMALL LETTER AE u'\u02c7' # 0x00ff -> CARON ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # CONTROL CHARACTER 0x0001: 0x0001, # CONTROL CHARACTER 0x0002: 0x0002, # CONTROL CHARACTER 0x0003: 0x0003, # CONTROL CHARACTER 0x0004: 0x0004, # CONTROL CHARACTER 0x0005: 0x0005, # CONTROL CHARACTER 0x0006: 0x0006, # CONTROL CHARACTER 0x0007: 0x0007, # CONTROL CHARACTER 0x0008: 0x0008, # CONTROL CHARACTER 0x0009: 0x0009, # CONTROL CHARACTER 0x000a: 0x000a, # CONTROL CHARACTER 0x000b: 0x000b, # CONTROL CHARACTER 0x000c: 0x000c, # CONTROL CHARACTER 0x000d: 0x000d, # CONTROL CHARACTER 0x000e: 0x000e, # CONTROL CHARACTER 0x000f: 0x000f, # CONTROL CHARACTER 0x0010: 0x0010, # CONTROL CHARACTER 0x0011: 0x0011, # CONTROL CHARACTER 0x0012: 0x0012, # CONTROL CHARACTER 0x0013: 0x0013, # CONTROL CHARACTER 0x0014: 0x0014, # CONTROL CHARACTER 0x0015: 0x0015, # CONTROL CHARACTER 0x0016: 0x0016, # CONTROL CHARACTER 0x0017: 0x0017, # CONTROL CHARACTER 0x0018: 0x0018, # CONTROL CHARACTER 0x0019: 0x0019, # CONTROL CHARACTER 0x001a: 0x001a, # CONTROL CHARACTER 0x001b: 0x001b, # CONTROL CHARACTER 0x001c: 0x001c, # CONTROL CHARACTER 0x001d: 0x001d, # CONTROL CHARACTER 0x001e: 0x001e, # CONTROL CHARACTER 0x001f: 0x001f, # CONTROL CHARACTER 0x0020: 0x0020, # SPACE 0x0021: 0x0021, # EXCLAMATION MARK 0x0022: 0x0022, # QUOTATION MARK 0x0023: 0x0023, # NUMBER SIGN 0x0024: 0x0024, # DOLLAR SIGN 0x0025: 0x0025, # PERCENT SIGN 0x0026: 0x0026, # AMPERSAND 0x0027: 0x0027, # APOSTROPHE 0x0028: 0x0028, # LEFT PARENTHESIS 0x0029: 0x0029, # RIGHT PARENTHESIS 0x002a: 0x002a, # ASTERISK 0x002b: 0x002b, # PLUS SIGN 0x002c: 0x002c, # COMMA 0x002d: 0x002d, # HYPHEN-MINUS 0x002e: 0x002e, # FULL STOP 0x002f: 0x002f, # SOLIDUS 0x0030: 0x0030, # DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE 0x0032: 0x0032, # DIGIT TWO 0x0033: 0x0033, # DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE 0x003a: 0x003a, # COLON 0x003b: 0x003b, # SEMICOLON 0x003c: 0x003c, # LESS-THAN SIGN 0x003d: 0x003d, # EQUALS SIGN 0x003e: 0x003e, # GREATER-THAN SIGN 0x003f: 0x003f, # QUESTION MARK 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005c: 0x005c, # REVERSE SOLIDUS 0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005f: 0x005f, # LOW LINE 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET 0x007c: 0x007c, # VERTICAL LINE 0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # CONTROL CHARACTER 0x00a0: 0x00ca, # NO-BREAK SPACE 0x00a1: 0x00c1, # INVERTED EXCLAMATION MARK 0x00a2: 0x00a2, # CENT SIGN 0x00a3: 0x00a3, # POUND SIGN 0x00a7: 0x00a4, # SECTION SIGN 0x00a8: 0x00ac, # DIAERESIS 0x00a9: 0x00d9, # COPYRIGHT SIGN 0x00aa: 0x00bb, # FEMININE ORDINAL INDICATOR 0x00ab: 0x00c7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00ac: 0x00c2, # NOT SIGN 0x00ae: 0x00a8, # REGISTERED SIGN 0x00af: 0x00f8, # MACRON 0x00b0: 0x00a1, # DEGREE SIGN 0x00b1: 0x00b1, # PLUS-MINUS SIGN 0x00b4: 0x00ab, # ACUTE ACCENT 0x00b5: 0x00b5, # MICRO SIGN 0x00b6: 0x00a6, # PILCROW SIGN 0x00b7: 0x00e1, # MIDDLE DOT 0x00b8: 0x00fc, # CEDILLA 0x00ba: 0x00bc, # MASCULINE ORDINAL INDICATOR 0x00bb: 0x00df, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00bf: 0x00c0, # INVERTED QUESTION MARK 0x00c0: 0x00cb, # LATIN CAPITAL LETTER A WITH GRAVE 0x00c1: 0x00e7, # LATIN CAPITAL LETTER A WITH ACUTE 0x00c2: 0x00e5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x00c3: 0x00cc, # LATIN CAPITAL LETTER A WITH TILDE 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x00c5: 0x0081, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x00c6: 0x00de, # LATIN CAPITAL LETTER AE 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA 0x00c8: 0x00e9, # LATIN CAPITAL LETTER E WITH GRAVE 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE 0x00ca: 0x00fd, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x00cb: 0x00fa, # LATIN CAPITAL LETTER E WITH DIAERESIS 0x00cc: 0x00ed, # LATIN CAPITAL LETTER I WITH GRAVE 0x00cd: 0x00ea, # LATIN CAPITAL LETTER I WITH ACUTE 0x00ce: 0x00eb, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 0x00cf: 0x00ec, # LATIN CAPITAL LETTER I WITH DIAERESIS 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE 0x00d2: 0x00f1, # LATIN CAPITAL LETTER O WITH GRAVE 0x00d3: 0x00ee, # LATIN CAPITAL LETTER O WITH ACUTE 0x00d4: 0x00ef, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00d5: 0x00cd, # LATIN CAPITAL LETTER O WITH TILDE 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x00d8: 0x00af, # LATIN CAPITAL LETTER O WITH STROKE 0x00d9: 0x00f4, # LATIN CAPITAL LETTER U WITH GRAVE 0x00da: 0x00f2, # LATIN CAPITAL LETTER U WITH ACUTE 0x00db: 0x00f3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00df: 0x00a7, # LATIN SMALL LETTER SHARP S 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00e3: 0x008b, # LATIN SMALL LETTER A WITH TILDE 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS 0x00e5: 0x008c, # LATIN SMALL LETTER A WITH RING ABOVE 0x00e6: 0x00fe, # LATIN SMALL LETTER AE 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS 0x00ec: 0x0093, # LATIN SMALL LETTER I WITH GRAVE 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE 0x00f2: 0x0098, # LATIN SMALL LETTER O WITH GRAVE 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00f5: 0x009b, # LATIN SMALL LETTER O WITH TILDE 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS 0x00f7: 0x00d6, # DIVISION SIGN 0x00f8: 0x00bf, # LATIN SMALL LETTER O WITH STROKE 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS 0x0106: 0x00c6, # LATIN CAPITAL LETTER C WITH ACUTE 0x0107: 0x00e6, # LATIN SMALL LETTER C WITH ACUTE 0x010c: 0x00c8, # LATIN CAPITAL LETTER C WITH CARON 0x010d: 0x00e8, # LATIN SMALL LETTER C WITH CARON 0x0110: 0x00d0, # LATIN CAPITAL LETTER D WITH STROKE 0x0111: 0x00f0, # LATIN SMALL LETTER D WITH STROKE 0x0131: 0x00f5, # LATIN SMALL LETTER DOTLESS I 0x0152: 0x00ce, # LATIN CAPITAL LIGATURE OE 0x0153: 0x00cf, # LATIN SMALL LIGATURE OE 0x0160: 0x00a9, # LATIN CAPITAL LETTER S WITH CARON 0x0161: 0x00b9, # LATIN SMALL LETTER S WITH CARON 0x017d: 0x00ae, # LATIN CAPITAL LETTER Z WITH CARON 0x017e: 0x00be, # LATIN SMALL LETTER Z WITH CARON 0x0192: 0x00c4, # LATIN SMALL LETTER F WITH HOOK 0x02c6: 0x00f6, # MODIFIER LETTER CIRCUMFLEX ACCENT 0x02c7: 0x00ff, # CARON 0x02da: 0x00fb, # RING ABOVE 0x02dc: 0x00f7, # SMALL TILDE 0x03a9: 0x00bd, # GREEK CAPITAL LETTER OMEGA 0x03c0: 0x00f9, # GREEK SMALL LETTER PI 0x2013: 0x00e0, # EN DASH 0x2014: 0x00d1, # EM DASH 0x2018: 0x00d4, # LEFT SINGLE QUOTATION MARK 0x2019: 0x00d5, # RIGHT SINGLE QUOTATION MARK 0x201a: 0x00e2, # SINGLE LOW-9 QUOTATION MARK 0x201c: 0x00d2, # LEFT DOUBLE QUOTATION MARK 0x201d: 0x00d3, # RIGHT DOUBLE QUOTATION MARK 0x201e: 0x00e3, # DOUBLE LOW-9 QUOTATION MARK 0x2020: 0x00a0, # DAGGER 0x2022: 0x00a5, # BULLET 0x2026: 0x00c9, # HORIZONTAL ELLIPSIS 0x2030: 0x00e4, # PER MILLE SIGN 0x2039: 0x00dc, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK 0x203a: 0x00dd, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 0x2044: 0x00da, # FRACTION SLASH 0x20ac: 0x00db, # EURO SIGN 0x2122: 0x00aa, # TRADE MARK SIGN 0x2202: 0x00b6, # PARTIAL DIFFERENTIAL 0x2206: 0x00b4, # INCREMENT 0x220f: 0x00b8, # N-ARY PRODUCT 0x2211: 0x00b7, # N-ARY SUMMATION 0x221a: 0x00c3, # SQUARE ROOT 0x221e: 0x00b0, # INFINITY 0x222b: 0x00ba, # INTEGRAL 0x2248: 0x00c5, # ALMOST EQUAL TO 0x2260: 0x00ad, # NOT EQUAL TO 0x2264: 0x00b2, # LESS-THAN OR EQUAL TO 0x2265: 0x00b3, # GREATER-THAN OR EQUAL TO 0x25ca: 0x00d7, # LOZENGE 0xf8ff: 0x00d8, # Apple logo } --- NEW FILE: mac_farsi.py --- """ Python Character Mapping Codec generated from 'VENDORS/APPLE/FARSI.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x00a0, # NO-BREAK SPACE, right-left 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x008b: 0x06ba, # ARABIC LETTER NOON GHUNNA 0x008c: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE 0x0093: 0x2026, # HORIZONTAL ELLIPSIS, right-left 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE 0x0098: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS 0x009b: 0x00f7, # DIVISION SIGN, right-left 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x00a0: 0x0020, # SPACE, right-left 0x00a1: 0x0021, # EXCLAMATION MARK, right-left 0x00a2: 0x0022, # QUOTATION MARK, right-left 0x00a3: 0x0023, # NUMBER SIGN, right-left 0x00a4: 0x0024, # DOLLAR SIGN, right-left 0x00a5: 0x066a, # ARABIC PERCENT SIGN 0x00a6: 0x0026, # AMPERSAND, right-left 0x00a7: 0x0027, # APOSTROPHE, right-left 0x00a8: 0x0028, # LEFT PARENTHESIS, right-left 0x00a9: 0x0029, # RIGHT PARENTHESIS, right-left 0x00aa: 0x002a, # ASTERISK, right-left 0x00ab: 0x002b, # PLUS SIGN, right-left 0x00ac: 0x060c, # ARABIC COMMA 0x00ad: 0x002d, # HYPHEN-MINUS, right-left 0x00ae: 0x002e, # FULL STOP, right-left 0x00af: 0x002f, # SOLIDUS, right-left 0x00b0: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) 0x00b1: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) 0x00b2: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) 0x00b3: 0x06f3, # EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) 0x00b4: 0x06f4, # EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) 0x00b5: 0x06f5, # EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) 0x00b6: 0x06f6, # EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) 0x00b7: 0x06f7, # EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) 0x00b8: 0x06f8, # EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) 0x00b9: 0x06f9, # EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) 0x00ba: 0x003a, # COLON, right-left 0x00bb: 0x061b, # ARABIC SEMICOLON 0x00bc: 0x003c, # LESS-THAN SIGN, right-left 0x00bd: 0x003d, # EQUALS SIGN, right-left 0x00be: 0x003e, # GREATER-THAN SIGN, right-left 0x00bf: 0x061f, # ARABIC QUESTION MARK 0x00c0: 0x274a, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left 0x00c1: 0x0621, # ARABIC LETTER HAMZA 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE 0x00c7: 0x0627, # ARABIC LETTER ALEF 0x00c8: 0x0628, # ARABIC LETTER BEH 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA 0x00ca: 0x062a, # ARABIC LETTER TEH 0x00cb: 0x062b, # ARABIC LETTER THEH 0x00cc: 0x062c, # ARABIC LETTER JEEM 0x00cd: 0x062d, # ARABIC LETTER HAH 0x00ce: 0x062e, # ARABIC LETTER KHAH 0x00cf: 0x062f, # ARABIC LETTER DAL 0x00d0: 0x0630, # ARABIC LETTER THAL 0x00d1: 0x0631, # ARABIC LETTER REH 0x00d2: 0x0632, # ARABIC LETTER ZAIN 0x00d3: 0x0633, # ARABIC LETTER SEEN 0x00d4: 0x0634, # ARABIC LETTER SHEEN 0x00d5: 0x0635, # ARABIC LETTER SAD 0x00d6: 0x0636, # ARABIC LETTER DAD 0x00d7: 0x0637, # ARABIC LETTER TAH 0x00d8: 0x0638, # ARABIC LETTER ZAH 0x00d9: 0x0639, # ARABIC LETTER AIN 0x00da: 0x063a, # ARABIC LETTER GHAIN 0x00db: 0x005b, # LEFT SQUARE BRACKET, right-left 0x00dc: 0x005c, # REVERSE SOLIDUS, right-left 0x00dd: 0x005d, # RIGHT SQUARE BRACKET, right-left 0x00de: 0x005e, # CIRCUMFLEX ACCENT, right-left 0x00df: 0x005f, # LOW LINE, right-left 0x00e0: 0x0640, # ARABIC TATWEEL 0x00e1: 0x0641, # ARABIC LETTER FEH 0x00e2: 0x0642, # ARABIC LETTER QAF 0x00e3: 0x0643, # ARABIC LETTER KAF 0x00e4: 0x0644, # ARABIC LETTER LAM 0x00e5: 0x0645, # ARABIC LETTER MEEM 0x00e6: 0x0646, # ARABIC LETTER NOON 0x00e7: 0x0647, # ARABIC LETTER HEH 0x00e8: 0x0648, # ARABIC LETTER WAW 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA 0x00ea: 0x064a, # ARABIC LETTER YEH 0x00eb: 0x064b, # ARABIC FATHATAN 0x00ec: 0x064c, # ARABIC DAMMATAN 0x00ed: 0x064d, # ARABIC KASRATAN 0x00ee: 0x064e, # ARABIC FATHA 0x00ef: 0x064f, # ARABIC DAMMA 0x00f0: 0x0650, # ARABIC KASRA 0x00f1: 0x0651, # ARABIC SHADDA 0x00f2: 0x0652, # ARABIC SUKUN 0x00f3: 0x067e, # ARABIC LETTER PEH 0x00f4: 0x0679, # ARABIC LETTER TTEH 0x00f5: 0x0686, # ARABIC LETTER TCHEH 0x00f6: 0x06d5, # ARABIC LETTER AE 0x00f7: 0x06a4, # ARABIC LETTER VEH 0x00f8: 0x06af, # ARABIC LETTER GAF 0x00f9: 0x0688, # ARABIC LETTER DDAL 0x00fa: 0x0691, # ARABIC LETTER RREH 0x00fb: 0x007b, # LEFT CURLY BRACKET, right-left 0x00fc: 0x007c, # VERTICAL LINE, right-left 0x00fd: 0x007d, # RIGHT CURLY BRACKET, right-left 0x00fe: 0x0698, # ARABIC LETTER JEH 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> CONTROL CHARACTER u'\x01' # 0x0001 -> CONTROL CHARACTER u'\x02' # 0x0002 -> CONTROL CHARACTER u'\x03' # 0x0003 -> CONTROL CHARACTER u'\x04' # 0x0004 -> CONTROL CHARACTER u'\x05' # 0x0005 -> CONTROL CHARACTER u'\x06' # 0x0006 -> CONTROL CHARACTER u'\x07' # 0x0007 -> CONTROL CHARACTER u'\x08' # 0x0008 -> CONTROL CHARACTER u'\t' # 0x0009 -> CONTROL CHARACTER u'\n' # 0x000a -> CONTROL CHARACTER u'\x0b' # 0x000b -> CONTROL CHARACTER u'\x0c' # 0x000c -> CONTROL CHARACTER u'\r' # 0x000d -> CONTROL CHARACTER u'\x0e' # 0x000e -> CONTROL CHARACTER u'\x0f' # 0x000f -> CONTROL CHARACTER u'\x10' # 0x0010 -> CONTROL CHARACTER u'\x11' # 0x0011 -> CONTROL CHARACTER u'\x12' # 0x0012 -> CONTROL CHARACTER u'\x13' # 0x0013 -> CONTROL CHARACTER u'\x14' # 0x0014 -> CONTROL CHARACTER u'\x15' # 0x0015 -> CONTROL CHARACTER u'\x16' # 0x0016 -> CONTROL CHARACTER u'\x17' # 0x0017 -> CONTROL CHARACTER u'\x18' # 0x0018 -> CONTROL CHARACTER u'\x19' # 0x0019 -> CONTROL CHARACTER u'\x1a' # 0x001a -> CONTROL CHARACTER u'\x1b' # 0x001b -> CONTROL CHARACTER u'\x1c' # 0x001c -> CONTROL CHARACTER u'\x1d' # 0x001d -> CONTROL CHARACTER u'\x1e' # 0x001e -> CONTROL CHARACTER u'\x1f' # 0x001f -> CONTROL CHARACTER u' ' # 0x0020 -> SPACE, left-right u'!' # 0x0021 -> EXCLAMATION MARK, left-right u'"' # 0x0022 -> QUOTATION MARK, left-right u'#' # 0x0023 -> NUMBER SIGN, left-right u'$' # 0x0024 -> DOLLAR SIGN, left-right u'%' # 0x0025 -> PERCENT SIGN, left-right u'&' # 0x0026 -> AMPERSAND, left-right u"'" # 0x0027 -> APOSTROPHE, left-right u'(' # 0x0028 -> LEFT PARENTHESIS, left-right u')' # 0x0029 -> RIGHT PARENTHESIS, left-right u'*' # 0x002a -> ASTERISK, left-right u'+' # 0x002b -> PLUS SIGN, left-right u',' # 0x002c -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR u'-' # 0x002d -> HYPHEN-MINUS, left-right u'.' # 0x002e -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR u'/' # 0x002f -> SOLIDUS, left-right u'0' # 0x0030 -> DIGIT ZERO; in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE; in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE u'2' # 0x0032 -> DIGIT TWO; in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO u'3' # 0x0033 -> DIGIT THREE; in Arabic-script context, displayed as 0x06F3 EXTENDED ARABIC-INDIC DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR; in Arabic-script context, displayed as 0x06F4 EXTENDED ARABIC-INDIC DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE; in Arabic-script context, displayed as 0x06F5 EXTENDED ARABIC-INDIC DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX; in Arabic-script context, displayed as 0x06F6 EXTENDED ARABIC-INDIC DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE; in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE u':' # 0x003a -> COLON, left-right u';' # 0x003b -> SEMICOLON, left-right u'<' # 0x003c -> LESS-THAN SIGN, left-right u'=' # 0x003d -> EQUALS SIGN, left-right u'>' # 0x003e -> GREATER-THAN SIGN, left-right u'?' # 0x003f -> QUESTION MARK, left-right u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET, left-right u'\\' # 0x005c -> REVERSE SOLIDUS, left-right u']' # 0x005d -> RIGHT SQUARE BRACKET, left-right u'^' # 0x005e -> CIRCUMFLEX ACCENT, left-right u'_' # 0x005f -> LOW LINE, left-right u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET, left-right u'|' # 0x007c -> VERTICAL LINE, left-right u'}' # 0x007d -> RIGHT CURLY BRACKET, left-right u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> CONTROL CHARACTER u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xa0' # 0x0081 -> NO-BREAK SPACE, right-left u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS u'\u06ba' # 0x008b -> ARABIC LETTER NOON GHUNNA u'\xab' # 0x008c -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE u'\u2026' # 0x0093 -> HORIZONTAL ELLIPSIS, right-left u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE u'\xbb' # 0x0098 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf7' # 0x009b -> DIVISION SIGN, right-left u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS u' ' # 0x00a0 -> SPACE, right-left u'!' # 0x00a1 -> EXCLAMATION MARK, right-left u'"' # 0x00a2 -> QUOTATION MARK, right-left u'#' # 0x00a3 -> NUMBER SIGN, right-left u'$' # 0x00a4 -> DOLLAR SIGN, right-left u'\u066a' # 0x00a5 -> ARABIC PERCENT SIGN u'&' # 0x00a6 -> AMPERSAND, right-left u"'" # 0x00a7 -> APOSTROPHE, right-left u'(' # 0x00a8 -> LEFT PARENTHESIS, right-left u')' # 0x00a9 -> RIGHT PARENTHESIS, right-left u'*' # 0x00aa -> ASTERISK, right-left u'+' # 0x00ab -> PLUS SIGN, right-left u'\u060c' # 0x00ac -> ARABIC COMMA u'-' # 0x00ad -> HYPHEN-MINUS, right-left u'.' # 0x00ae -> FULL STOP, right-left u'/' # 0x00af -> SOLIDUS, right-left u'\u06f0' # 0x00b0 -> EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) u'\u06f1' # 0x00b1 -> EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) u'\u06f2' # 0x00b2 -> EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) u'\u06f3' # 0x00b3 -> EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) u'\u06f4' # 0x00b4 -> EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) u'\u06f5' # 0x00b5 -> EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) u'\u06f6' # 0x00b6 -> EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) u'\u06f7' # 0x00b7 -> EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) u'\u06f8' # 0x00b8 -> EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) u'\u06f9' # 0x00b9 -> EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) u':' # 0x00ba -> COLON, right-left u'\u061b' # 0x00bb -> ARABIC SEMICOLON u'<' # 0x00bc -> LESS-THAN SIGN, right-left u'=' # 0x00bd -> EQUALS SIGN, right-left u'>' # 0x00be -> GREATER-THAN SIGN, right-left u'\u061f' # 0x00bf -> ARABIC QUESTION MARK u'\u274a' # 0x00c0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left u'\u0621' # 0x00c1 -> ARABIC LETTER HAMZA u'\u0622' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE u'\u0623' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE u'\u0624' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE u'\u0625' # 0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW u'\u0626' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE u'\u0627' # 0x00c7 -> ARABIC LETTER ALEF u'\u0628' # 0x00c8 -> ARABIC LETTER BEH u'\u0629' # 0x00c9 -> ARABIC LETTER TEH MARBUTA u'\u062a' # 0x00ca -> ARABIC LETTER TEH u'\u062b' # 0x00cb -> ARABIC LETTER THEH u'\u062c' # 0x00cc -> ARABIC LETTER JEEM u'\u062d' # 0x00cd -> ARABIC LETTER HAH u'\u062e' # 0x00ce -> ARABIC LETTER KHAH u'\u062f' # 0x00cf -> ARABIC LETTER DAL u'\u0630' # 0x00d0 -> ARABIC LETTER THAL u'\u0631' # 0x00d1 -> ARABIC LETTER REH u'\u0632' # 0x00d2 -> ARABIC LETTER ZAIN u'\u0633' # 0x00d3 -> ARABIC LETTER SEEN u'\u0634' # 0x00d4 -> ARABIC LETTER SHEEN u'\u0635' # 0x00d5 -> ARABIC LETTER SAD u'\u0636' # 0x00d6 -> ARABIC LETTER DAD u'\u0637' # 0x00d7 -> ARABIC LETTER TAH u'\u0638' # 0x00d8 -> ARABIC LETTER ZAH u'\u0639' # 0x00d9 -> ARABIC LETTER AIN u'\u063a' # 0x00da -> ARABIC LETTER GHAIN u'[' # 0x00db -> LEFT SQUARE BRACKET, right-left u'\\' # 0x00dc -> REVERSE SOLIDUS, right-left u']' # 0x00dd -> RIGHT SQUARE BRACKET, right-left u'^' # 0x00de -> CIRCUMFLEX ACCENT, right-left u'_' # 0x00df -> LOW LINE, right-left u'\u0640' # 0x00e0 -> ARABIC TATWEEL u'\u0641' # 0x00e1 -> ARABIC LETTER FEH u'\u0642' # 0x00e2 -> ARABIC LETTER QAF u'\u0643' # 0x00e3 -> ARABIC LETTER KAF u'\u0644' # 0x00e4 -> ARABIC LETTER LAM u'\u0645' # 0x00e5 -> ARABIC LETTER MEEM u'\u0646' # 0x00e6 -> ARABIC LETTER NOON u'\u0647' # 0x00e7 -> ARABIC LETTER HEH u'\u0648' # 0x00e8 -> ARABIC LETTER WAW u'\u0649' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA u'\u064a' # 0x00ea -> ARABIC LETTER YEH u'\u064b' # 0x00eb -> ARABIC FATHATAN u'\u064c' # 0x00ec -> ARABIC DAMMATAN u'\u064d' # 0x00ed -> ARABIC KASRATAN u'\u064e' # 0x00ee -> ARABIC FATHA u'\u064f' # 0x00ef -> ARABIC DAMMA u'\u0650' # 0x00f0 -> ARABIC KASRA u'\u0651' # 0x00f1 -> ARABIC SHADDA u'\u0652' # 0x00f2 -> ARABIC SUKUN u'\u067e' # 0x00f3 -> ARABIC LETTER PEH u'\u0679' # 0x00f4 -> ARABIC LETTER TTEH u'\u0686' # 0x00f5 -> ARABIC LETTER TCHEH u'\u06d5' # 0x00f6 -> ARABIC LETTER AE u'\u06a4' # 0x00f7 -> ARABIC LETTER VEH u'\u06af' # 0x00f8 -> ARABIC LETTER GAF u'\u0688' # 0x00f9 -> ARABIC LETTER DDAL u'\u0691' # 0x00fa -> ARABIC LETTER RREH u'{' # 0x00fb -> LEFT CURLY BRACKET, right-left u'|' # 0x00fc -> VERTICAL LINE, right-left u'}' # 0x00fd -> RIGHT CURLY BRACKET, right-left u'\u0698' # 0x00fe -> ARABIC LETTER JEH u'\u06d2' # 0x00ff -> ARABIC LETTER YEH BARREE ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # CONTROL CHARACTER 0x0001: 0x0001, # CONTROL CHARACTER 0x0002: 0x0002, # CONTROL CHARACTER 0x0003: 0x0003, # CONTROL CHARACTER 0x0004: 0x0004, # CONTROL CHARACTER 0x0005: 0x0005, # CONTROL CHARACTER 0x0006: 0x0006, # CONTROL CHARACTER 0x0007: 0x0007, # CONTROL CHARACTER 0x0008: 0x0008, # CONTROL CHARACTER 0x0009: 0x0009, # CONTROL CHARACTER 0x000a: 0x000a, # CONTROL CHARACTER 0x000b: 0x000b, # CONTROL CHARACTER 0x000c: 0x000c, # CONTROL CHARACTER 0x000d: 0x000d, # CONTROL CHARACTER 0x000e: 0x000e, # CONTROL CHARACTER 0x000f: 0x000f, # CONTROL CHARACTER 0x0010: 0x0010, # CONTROL CHARACTER 0x0011: 0x0011, # CONTROL CHARACTER 0x0012: 0x0012, # CONTROL CHARACTER 0x0013: 0x0013, # CONTROL CHARACTER 0x0014: 0x0014, # CONTROL CHARACTER 0x0015: 0x0015, # CONTROL CHARACTER 0x0016: 0x0016, # CONTROL CHARACTER 0x0017: 0x0017, # CONTROL CHARACTER 0x0018: 0x0018, # CONTROL CHARACTER 0x0019: 0x0019, # CONTROL CHARACTER 0x001a: 0x001a, # CONTROL CHARACTER 0x001b: 0x001b, # CONTROL CHARACTER 0x001c: 0x001c, # CONTROL CHARACTER 0x001d: 0x001d, # CONTROL CHARACTER 0x001e: 0x001e, # CONTROL CHARACTER 0x001f: 0x001f, # CONTROL CHARACTER 0x0020: 0x0020, # SPACE, left-right 0x0020: 0x00a0, # SPACE, right-left 0x0021: 0x0021, # EXCLAMATION MARK, left-right 0x0021: 0x00a1, # EXCLAMATION MARK, right-left 0x0022: 0x0022, # QUOTATION MARK, left-right 0x0022: 0x00a2, # QUOTATION MARK, right-left 0x0023: 0x0023, # NUMBER SIGN, left-right 0x0023: 0x00a3, # NUMBER SIGN, right-left 0x0024: 0x0024, # DOLLAR SIGN, left-right 0x0024: 0x00a4, # DOLLAR SIGN, right-left 0x0025: 0x0025, # PERCENT SIGN, left-right 0x0026: 0x0026, # AMPERSAND, left-right 0x0026: 0x00a6, # AMPERSAND, right-left 0x0027: 0x0027, # APOSTROPHE, left-right 0x0027: 0x00a7, # APOSTROPHE, right-left 0x0028: 0x0028, # LEFT PARENTHESIS, left-right 0x0028: 0x00a8, # LEFT PARENTHESIS, right-left 0x0029: 0x0029, # RIGHT PARENTHESIS, left-right 0x0029: 0x00a9, # RIGHT PARENTHESIS, right-left 0x002a: 0x002a, # ASTERISK, left-right 0x002a: 0x00aa, # ASTERISK, right-left 0x002b: 0x002b, # PLUS SIGN, left-right 0x002b: 0x00ab, # PLUS SIGN, right-left 0x002c: 0x002c, # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR 0x002d: 0x002d, # HYPHEN-MINUS, left-right 0x002d: 0x00ad, # HYPHEN-MINUS, right-left 0x002e: 0x002e, # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR 0x002e: 0x00ae, # FULL STOP, right-left 0x002f: 0x002f, # SOLIDUS, left-right 0x002f: 0x00af, # SOLIDUS, right-left 0x0030: 0x0030, # DIGIT ZERO; in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE; in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE 0x0032: 0x0032, # DIGIT TWO; in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO 0x0033: 0x0033, # DIGIT THREE; in Arabic-script context, displayed as 0x06F3 EXTENDED ARABIC-INDIC DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR; in Arabic-script context, displayed as 0x06F4 EXTENDED ARABIC-INDIC DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE; in Arabic-script context, displayed as 0x06F5 EXTENDED ARABIC-INDIC DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX; in Arabic-script context, displayed as 0x06F6 EXTENDED ARABIC-INDIC DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE; in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE 0x003a: 0x003a, # COLON, left-right 0x003a: 0x00ba, # COLON, right-left 0x003b: 0x003b, # SEMICOLON, left-right 0x003c: 0x003c, # LESS-THAN SIGN, left-right 0x003c: 0x00bc, # LESS-THAN SIGN, right-left 0x003d: 0x003d, # EQUALS SIGN, left-right 0x003d: 0x00bd, # EQUALS SIGN, right-left 0x003e: 0x003e, # GREATER-THAN SIGN, left-right 0x003e: 0x00be, # GREATER-THAN SIGN, right-left 0x003f: 0x003f, # QUESTION MARK, left-right 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET, left-right 0x005b: 0x00db, # LEFT SQUARE BRACKET, right-left 0x005c: 0x005c, # REVERSE SOLIDUS, left-right 0x005c: 0x00dc, # REVERSE SOLIDUS, right-left 0x005d: 0x005d, # RIGHT SQUARE BRACKET, left-right 0x005d: 0x00dd, # RIGHT SQUARE BRACKET, right-left 0x005e: 0x005e, # CIRCUMFLEX ACCENT, left-right 0x005e: 0x00de, # CIRCUMFLEX ACCENT, right-left 0x005f: 0x005f, # LOW LINE, left-right 0x005f: 0x00df, # LOW LINE, right-left 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET, left-right 0x007b: 0x00fb, # LEFT CURLY BRACKET, right-left 0x007c: 0x007c, # VERTICAL LINE, left-right 0x007c: 0x00fc, # VERTICAL LINE, right-left 0x007d: 0x007d, # RIGHT CURLY BRACKET, left-right 0x007d: 0x00fd, # RIGHT CURLY BRACKET, right-left 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # CONTROL CHARACTER 0x00a0: 0x0081, # NO-BREAK SPACE, right-left 0x00ab: 0x008c, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left 0x00bb: 0x0098, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS 0x00f7: 0x009b, # DIVISION SIGN, right-left 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS 0x060c: 0x00ac, # ARABIC COMMA 0x061b: 0x00bb, # ARABIC SEMICOLON 0x061f: 0x00bf, # ARABIC QUESTION MARK 0x0621: 0x00c1, # ARABIC LETTER HAMZA 0x0622: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE 0x0623: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE 0x0624: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE 0x0625: 0x00c5, # ARABIC LETTER ALEF WITH HAMZA BELOW 0x0626: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE 0x0627: 0x00c7, # ARABIC LETTER ALEF 0x0628: 0x00c8, # ARABIC LETTER BEH 0x0629: 0x00c9, # ARABIC LETTER TEH MARBUTA 0x062a: 0x00ca, # ARABIC LETTER TEH 0x062b: 0x00cb, # ARABIC LETTER THEH 0x062c: 0x00cc, # ARABIC LETTER JEEM 0x062d: 0x00cd, # ARABIC LETTER HAH 0x062e: 0x00ce, # ARABIC LETTER KHAH 0x062f: 0x00cf, # ARABIC LETTER DAL 0x0630: 0x00d0, # ARABIC LETTER THAL 0x0631: 0x00d1, # ARABIC LETTER REH 0x0632: 0x00d2, # ARABIC LETTER ZAIN 0x0633: 0x00d3, # ARABIC LETTER SEEN 0x0634: 0x00d4, # ARABIC LETTER SHEEN 0x0635: 0x00d5, # ARABIC LETTER SAD 0x0636: 0x00d6, # ARABIC LETTER DAD 0x0637: 0x00d7, # ARABIC LETTER TAH 0x0638: 0x00d8, # ARABIC LETTER ZAH 0x0639: 0x00d9, # ARABIC LETTER AIN 0x063a: 0x00da, # ARABIC LETTER GHAIN 0x0640: 0x00e0, # ARABIC TATWEEL 0x0641: 0x00e1, # ARABIC LETTER FEH 0x0642: 0x00e2, # ARABIC LETTER QAF 0x0643: 0x00e3, # ARABIC LETTER KAF 0x0644: 0x00e4, # ARABIC LETTER LAM 0x0645: 0x00e5, # ARABIC LETTER MEEM 0x0646: 0x00e6, # ARABIC LETTER NOON 0x0647: 0x00e7, # ARABIC LETTER HEH 0x0648: 0x00e8, # ARABIC LETTER WAW 0x0649: 0x00e9, # ARABIC LETTER ALEF MAKSURA 0x064a: 0x00ea, # ARABIC LETTER YEH 0x064b: 0x00eb, # ARABIC FATHATAN 0x064c: 0x00ec, # ARABIC DAMMATAN 0x064d: 0x00ed, # ARABIC KASRATAN 0x064e: 0x00ee, # ARABIC FATHA 0x064f: 0x00ef, # ARABIC DAMMA 0x0650: 0x00f0, # ARABIC KASRA 0x0651: 0x00f1, # ARABIC SHADDA 0x0652: 0x00f2, # ARABIC SUKUN 0x066a: 0x00a5, # ARABIC PERCENT SIGN 0x0679: 0x00f4, # ARABIC LETTER TTEH 0x067e: 0x00f3, # ARABIC LETTER PEH 0x0686: 0x00f5, # ARABIC LETTER TCHEH 0x0688: 0x00f9, # ARABIC LETTER DDAL 0x0691: 0x00fa, # ARABIC LETTER RREH 0x0698: 0x00fe, # ARABIC LETTER JEH 0x06a4: 0x00f7, # ARABIC LETTER VEH 0x06af: 0x00f8, # ARABIC LETTER GAF 0x06ba: 0x008b, # ARABIC LETTER NOON GHUNNA 0x06d2: 0x00ff, # ARABIC LETTER YEH BARREE 0x06d5: 0x00f6, # ARABIC LETTER AE 0x06f0: 0x00b0, # EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) 0x06f1: 0x00b1, # EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) 0x06f2: 0x00b2, # EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) 0x06f3: 0x00b3, # EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) 0x06f4: 0x00b4, # EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) 0x06f5: 0x00b5, # EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) 0x06f6: 0x00b6, # EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) 0x06f7: 0x00b7, # EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) 0x06f8: 0x00b8, # EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) 0x06f9: 0x00b9, # EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) 0x2026: 0x0093, # HORIZONTAL ELLIPSIS, right-left 0x274a: 0x00c0, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left } --- NEW FILE: mac_romanian.py --- """ Python Character Mapping Codec generated from 'VENDORS/APPLE/ROMANIAN.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE 0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE 0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE 0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x00a0: 0x2020, # DAGGER 0x00a1: 0x00b0, # DEGREE SIGN 0x00a4: 0x00a7, # SECTION SIGN 0x00a5: 0x2022, # BULLET 0x00a6: 0x00b6, # PILCROW SIGN 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S 0x00a8: 0x00ae, # REGISTERED SIGN 0x00aa: 0x2122, # TRADE MARK SIGN 0x00ab: 0x00b4, # ACUTE ACCENT 0x00ac: 0x00a8, # DIAERESIS 0x00ad: 0x2260, # NOT EQUAL TO 0x00ae: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE 0x00af: 0x0218, # LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later 0x00b0: 0x221e, # INFINITY 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO 0x00b4: 0x00a5, # YEN SIGN 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL 0x00b7: 0x2211, # N-ARY SUMMATION 0x00b8: 0x220f, # N-ARY PRODUCT 0x00b9: 0x03c0, # GREEK SMALL LETTER PI 0x00ba: 0x222b, # INTEGRAL 0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR 0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR 0x00bd: 0x03a9, # GREEK CAPITAL LETTER OMEGA 0x00be: 0x0103, # LATIN SMALL LETTER A WITH BREVE 0x00bf: 0x0219, # LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later 0x00c0: 0x00bf, # INVERTED QUESTION MARK 0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK 0x00c2: 0x00ac, # NOT SIGN 0x00c3: 0x221a, # SQUARE ROOT 0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK 0x00c5: 0x2248, # ALMOST EQUAL TO 0x00c6: 0x2206, # INCREMENT 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS 0x00ca: 0x00a0, # NO-BREAK SPACE 0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE 0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE 0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE 0x00cf: 0x0153, # LATIN SMALL LIGATURE OE 0x00d0: 0x2013, # EN DASH 0x00d1: 0x2014, # EM DASH 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK 0x00d6: 0x00f7, # DIVISION SIGN 0x00d7: 0x25ca, # LOZENGE 0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS 0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS 0x00da: 0x2044, # FRACTION SLASH 0x00db: 0x20ac, # EURO SIGN 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 0x00de: 0x021a, # LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later 0x00df: 0x021b, # LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later 0x00e0: 0x2021, # DOUBLE DAGGER 0x00e1: 0x00b7, # MIDDLE DOT 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK 0x00e4: 0x2030, # PER MILLE SIGN 0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE 0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS 0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE 0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS 0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00f0: 0xf8ff, # Apple logo 0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE 0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I 0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT 0x00f7: 0x02dc, # SMALL TILDE 0x00f8: 0x00af, # MACRON 0x00f9: 0x02d8, # BREVE 0x00fa: 0x02d9, # DOT ABOVE 0x00fb: 0x02da, # RING ABOVE 0x00fc: 0x00b8, # CEDILLA 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT 0x00fe: 0x02db, # OGONEK 0x00ff: 0x02c7, # CARON }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> CONTROL CHARACTER u'\x01' # 0x0001 -> CONTROL CHARACTER u'\x02' # 0x0002 -> CONTROL CHARACTER u'\x03' # 0x0003 -> CONTROL CHARACTER u'\x04' # 0x0004 -> CONTROL CHARACTER u'\x05' # 0x0005 -> CONTROL CHARACTER u'\x06' # 0x0006 -> CONTROL CHARACTER u'\x07' # 0x0007 -> CONTROL CHARACTER u'\x08' # 0x0008 -> CONTROL CHARACTER u'\t' # 0x0009 -> CONTROL CHARACTER u'\n' # 0x000a -> CONTROL CHARACTER u'\x0b' # 0x000b -> CONTROL CHARACTER u'\x0c' # 0x000c -> CONTROL CHARACTER u'\r' # 0x000d -> CONTROL CHARACTER u'\x0e' # 0x000e -> CONTROL CHARACTER u'\x0f' # 0x000f -> CONTROL CHARACTER u'\x10' # 0x0010 -> CONTROL CHARACTER u'\x11' # 0x0011 -> CONTROL CHARACTER u'\x12' # 0x0012 -> CONTROL CHARACTER u'\x13' # 0x0013 -> CONTROL CHARACTER u'\x14' # 0x0014 -> CONTROL CHARACTER u'\x15' # 0x0015 -> CONTROL CHARACTER u'\x16' # 0x0016 -> CONTROL CHARACTER u'\x17' # 0x0017 -> CONTROL CHARACTER u'\x18' # 0x0018 -> CONTROL CHARACTER u'\x19' # 0x0019 -> CONTROL CHARACTER u'\x1a' # 0x001a -> CONTROL CHARACTER u'\x1b' # 0x001b -> CONTROL CHARACTER u'\x1c' # 0x001c -> CONTROL CHARACTER u'\x1d' # 0x001d -> CONTROL CHARACTER u'\x1e' # 0x001e -> CONTROL CHARACTER u'\x1f' # 0x001f -> CONTROL CHARACTER u' ' # 0x0020 -> SPACE u'!' # 0x0021 -> EXCLAMATION MARK u'"' # 0x0022 -> QUOTATION MARK u'#' # 0x0023 -> NUMBER SIGN u'$' # 0x0024 -> DOLLAR SIGN u'%' # 0x0025 -> PERCENT SIGN u'&' # 0x0026 -> AMPERSAND u"'" # 0x0027 -> APOSTROPHE u'(' # 0x0028 -> LEFT PARENTHESIS u')' # 0x0029 -> RIGHT PARENTHESIS u'*' # 0x002a -> ASTERISK u'+' # 0x002b -> PLUS SIGN u',' # 0x002c -> COMMA u'-' # 0x002d -> HYPHEN-MINUS u'.' # 0x002e -> FULL STOP u'/' # 0x002f -> SOLIDUS u'0' # 0x0030 -> DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE u'2' # 0x0032 -> DIGIT TWO u'3' # 0x0033 -> DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE u':' # 0x003a -> COLON u';' # 0x003b -> SEMICOLON u'<' # 0x003c -> LESS-THAN SIGN u'=' # 0x003d -> EQUALS SIGN u'>' # 0x003e -> GREATER-THAN SIGN u'?' # 0x003f -> QUESTION MARK u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET u'\\' # 0x005c -> REVERSE SOLIDUS u']' # 0x005d -> RIGHT SQUARE BRACKET u'^' # 0x005e -> CIRCUMFLEX ACCENT u'_' # 0x005f -> LOW LINE u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET u'|' # 0x007c -> VERTICAL LINE u'}' # 0x007d -> RIGHT CURLY BRACKET u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> CONTROL CHARACTER u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0x0081 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS u'\xe3' # 0x008b -> LATIN SMALL LETTER A WITH TILDE u'\xe5' # 0x008c -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE u'\xec' # 0x0093 -> LATIN SMALL LETTER I WITH GRAVE u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE u'\xf2' # 0x0098 -> LATIN SMALL LETTER O WITH GRAVE u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf5' # 0x009b -> LATIN SMALL LETTER O WITH TILDE u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS u'\u2020' # 0x00a0 -> DAGGER u'\xb0' # 0x00a1 -> DEGREE SIGN u'\xa2' # 0x00a2 -> CENT SIGN u'\xa3' # 0x00a3 -> POUND SIGN u'\xa7' # 0x00a4 -> SECTION SIGN u'\u2022' # 0x00a5 -> BULLET u'\xb6' # 0x00a6 -> PILCROW SIGN u'\xdf' # 0x00a7 -> LATIN SMALL LETTER SHARP S u'\xae' # 0x00a8 -> REGISTERED SIGN u'\xa9' # 0x00a9 -> COPYRIGHT SIGN u'\u2122' # 0x00aa -> TRADE MARK SIGN u'\xb4' # 0x00ab -> ACUTE ACCENT u'\xa8' # 0x00ac -> DIAERESIS u'\u2260' # 0x00ad -> NOT EQUAL TO u'\u0102' # 0x00ae -> LATIN CAPITAL LETTER A WITH BREVE u'\u0218' # 0x00af -> LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later u'\u221e' # 0x00b0 -> INFINITY u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN u'\u2264' # 0x00b2 -> LESS-THAN OR EQUAL TO u'\u2265' # 0x00b3 -> GREATER-THAN OR EQUAL TO u'\xa5' # 0x00b4 -> YEN SIGN u'\xb5' # 0x00b5 -> MICRO SIGN u'\u2202' # 0x00b6 -> PARTIAL DIFFERENTIAL u'\u2211' # 0x00b7 -> N-ARY SUMMATION u'\u220f' # 0x00b8 -> N-ARY PRODUCT u'\u03c0' # 0x00b9 -> GREEK SMALL LETTER PI u'\u222b' # 0x00ba -> INTEGRAL u'\xaa' # 0x00bb -> FEMININE ORDINAL INDICATOR u'\xba' # 0x00bc -> MASCULINE ORDINAL INDICATOR u'\u03a9' # 0x00bd -> GREEK CAPITAL LETTER OMEGA u'\u0103' # 0x00be -> LATIN SMALL LETTER A WITH BREVE u'\u0219' # 0x00bf -> LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later u'\xbf' # 0x00c0 -> INVERTED QUESTION MARK u'\xa1' # 0x00c1 -> INVERTED EXCLAMATION MARK u'\xac' # 0x00c2 -> NOT SIGN u'\u221a' # 0x00c3 -> SQUARE ROOT u'\u0192' # 0x00c4 -> LATIN SMALL LETTER F WITH HOOK u'\u2248' # 0x00c5 -> ALMOST EQUAL TO u'\u2206' # 0x00c6 -> INCREMENT u'\xab' # 0x00c7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0x00c8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u2026' # 0x00c9 -> HORIZONTAL ELLIPSIS u'\xa0' # 0x00ca -> NO-BREAK SPACE u'\xc0' # 0x00cb -> LATIN CAPITAL LETTER A WITH GRAVE u'\xc3' # 0x00cc -> LATIN CAPITAL LETTER A WITH TILDE u'\xd5' # 0x00cd -> LATIN CAPITAL LETTER O WITH TILDE u'\u0152' # 0x00ce -> LATIN CAPITAL LIGATURE OE u'\u0153' # 0x00cf -> LATIN SMALL LIGATURE OE u'\u2013' # 0x00d0 -> EN DASH u'\u2014' # 0x00d1 -> EM DASH u'\u201c' # 0x00d2 -> LEFT DOUBLE QUOTATION MARK u'\u201d' # 0x00d3 -> RIGHT DOUBLE QUOTATION MARK u'\u2018' # 0x00d4 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x00d5 -> RIGHT SINGLE QUOTATION MARK u'\xf7' # 0x00d6 -> DIVISION SIGN u'\u25ca' # 0x00d7 -> LOZENGE u'\xff' # 0x00d8 -> LATIN SMALL LETTER Y WITH DIAERESIS u'\u0178' # 0x00d9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS u'\u2044' # 0x00da -> FRACTION SLASH u'\u20ac' # 0x00db -> EURO SIGN u'\u2039' # 0x00dc -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK u'\u203a' # 0x00dd -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK u'\u021a' # 0x00de -> LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later u'\u021b' # 0x00df -> LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later u'\u2021' # 0x00e0 -> DOUBLE DAGGER u'\xb7' # 0x00e1 -> MIDDLE DOT u'\u201a' # 0x00e2 -> SINGLE LOW-9 QUOTATION MARK u'\u201e' # 0x00e3 -> DOUBLE LOW-9 QUOTATION MARK u'\u2030' # 0x00e4 -> PER MILLE SIGN u'\xc2' # 0x00e5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\xca' # 0x00e6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX u'\xc1' # 0x00e7 -> LATIN CAPITAL LETTER A WITH ACUTE u'\xcb' # 0x00e8 -> LATIN CAPITAL LETTER E WITH DIAERESIS u'\xc8' # 0x00e9 -> LATIN CAPITAL LETTER E WITH GRAVE u'\xcd' # 0x00ea -> LATIN CAPITAL LETTER I WITH ACUTE u'\xce' # 0x00eb -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX u'\xcf' # 0x00ec -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\xcc' # 0x00ed -> LATIN CAPITAL LETTER I WITH GRAVE u'\xd3' # 0x00ee -> LATIN CAPITAL LETTER O WITH ACUTE u'\xd4' # 0x00ef -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\uf8ff' # 0x00f0 -> Apple logo u'\xd2' # 0x00f1 -> LATIN CAPITAL LETTER O WITH GRAVE u'\xda' # 0x00f2 -> LATIN CAPITAL LETTER U WITH ACUTE u'\xdb' # 0x00f3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX u'\xd9' # 0x00f4 -> LATIN CAPITAL LETTER U WITH GRAVE u'\u0131' # 0x00f5 -> LATIN SMALL LETTER DOTLESS I u'\u02c6' # 0x00f6 -> MODIFIER LETTER CIRCUMFLEX ACCENT u'\u02dc' # 0x00f7 -> SMALL TILDE u'\xaf' # 0x00f8 -> MACRON u'\u02d8' # 0x00f9 -> BREVE u'\u02d9' # 0x00fa -> DOT ABOVE u'\u02da' # 0x00fb -> RING ABOVE u'\xb8' # 0x00fc -> CEDILLA u'\u02dd' # 0x00fd -> DOUBLE ACUTE ACCENT u'\u02db' # 0x00fe -> OGONEK u'\u02c7' # 0x00ff -> CARON ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # CONTROL CHARACTER 0x0001: 0x0001, # CONTROL CHARACTER 0x0002: 0x0002, # CONTROL CHARACTER 0x0003: 0x0003, # CONTROL CHARACTER 0x0004: 0x0004, # CONTROL CHARACTER 0x0005: 0x0005, # CONTROL CHARACTER 0x0006: 0x0006, # CONTROL CHARACTER 0x0007: 0x0007, # CONTROL CHARACTER 0x0008: 0x0008, # CONTROL CHARACTER 0x0009: 0x0009, # CONTROL CHARACTER 0x000a: 0x000a, # CONTROL CHARACTER 0x000b: 0x000b, # CONTROL CHARACTER 0x000c: 0x000c, # CONTROL CHARACTER 0x000d: 0x000d, # CONTROL CHARACTER 0x000e: 0x000e, # CONTROL CHARACTER 0x000f: 0x000f, # CONTROL CHARACTER 0x0010: 0x0010, # CONTROL CHARACTER 0x0011: 0x0011, # CONTROL CHARACTER 0x0012: 0x0012, # CONTROL CHARACTER 0x0013: 0x0013, # CONTROL CHARACTER 0x0014: 0x0014, # CONTROL CHARACTER 0x0015: 0x0015, # CONTROL CHARACTER 0x0016: 0x0016, # CONTROL CHARACTER 0x0017: 0x0017, # CONTROL CHARACTER 0x0018: 0x0018, # CONTROL CHARACTER 0x0019: 0x0019, # CONTROL CHARACTER 0x001a: 0x001a, # CONTROL CHARACTER 0x001b: 0x001b, # CONTROL CHARACTER 0x001c: 0x001c, # CONTROL CHARACTER 0x001d: 0x001d, # CONTROL CHARACTER 0x001e: 0x001e, # CONTROL CHARACTER 0x001f: 0x001f, # CONTROL CHARACTER 0x0020: 0x0020, # SPACE 0x0021: 0x0021, # EXCLAMATION MARK 0x0022: 0x0022, # QUOTATION MARK 0x0023: 0x0023, # NUMBER SIGN 0x0024: 0x0024, # DOLLAR SIGN 0x0025: 0x0025, # PERCENT SIGN 0x0026: 0x0026, # AMPERSAND 0x0027: 0x0027, # APOSTROPHE 0x0028: 0x0028, # LEFT PARENTHESIS 0x0029: 0x0029, # RIGHT PARENTHESIS 0x002a: 0x002a, # ASTERISK 0x002b: 0x002b, # PLUS SIGN 0x002c: 0x002c, # COMMA 0x002d: 0x002d, # HYPHEN-MINUS 0x002e: 0x002e, # FULL STOP 0x002f: 0x002f, # SOLIDUS 0x0030: 0x0030, # DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE 0x0032: 0x0032, # DIGIT TWO 0x0033: 0x0033, # DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE 0x003a: 0x003a, # COLON 0x003b: 0x003b, # SEMICOLON 0x003c: 0x003c, # LESS-THAN SIGN 0x003d: 0x003d, # EQUALS SIGN 0x003e: 0x003e, # GREATER-THAN SIGN 0x003f: 0x003f, # QUESTION MARK 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005c: 0x005c, # REVERSE SOLIDUS 0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005f: 0x005f, # LOW LINE 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET 0x007c: 0x007c, # VERTICAL LINE 0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # CONTROL CHARACTER 0x00a0: 0x00ca, # NO-BREAK SPACE 0x00a1: 0x00c1, # INVERTED EXCLAMATION MARK 0x00a2: 0x00a2, # CENT SIGN 0x00a3: 0x00a3, # POUND SIGN 0x00a5: 0x00b4, # YEN SIGN 0x00a7: 0x00a4, # SECTION SIGN 0x00a8: 0x00ac, # DIAERESIS 0x00a9: 0x00a9, # COPYRIGHT SIGN 0x00aa: 0x00bb, # FEMININE ORDINAL INDICATOR 0x00ab: 0x00c7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00ac: 0x00c2, # NOT SIGN 0x00ae: 0x00a8, # REGISTERED SIGN 0x00af: 0x00f8, # MACRON 0x00b0: 0x00a1, # DEGREE SIGN 0x00b1: 0x00b1, # PLUS-MINUS SIGN 0x00b4: 0x00ab, # ACUTE ACCENT 0x00b5: 0x00b5, # MICRO SIGN 0x00b6: 0x00a6, # PILCROW SIGN 0x00b7: 0x00e1, # MIDDLE DOT 0x00b8: 0x00fc, # CEDILLA 0x00ba: 0x00bc, # MASCULINE ORDINAL INDICATOR 0x00bb: 0x00c8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00bf: 0x00c0, # INVERTED QUESTION MARK 0x00c0: 0x00cb, # LATIN CAPITAL LETTER A WITH GRAVE 0x00c1: 0x00e7, # LATIN CAPITAL LETTER A WITH ACUTE 0x00c2: 0x00e5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x00c3: 0x00cc, # LATIN CAPITAL LETTER A WITH TILDE 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x00c5: 0x0081, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA 0x00c8: 0x00e9, # LATIN CAPITAL LETTER E WITH GRAVE 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE 0x00ca: 0x00e6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x00cb: 0x00e8, # LATIN CAPITAL LETTER E WITH DIAERESIS 0x00cc: 0x00ed, # LATIN CAPITAL LETTER I WITH GRAVE 0x00cd: 0x00ea, # LATIN CAPITAL LETTER I WITH ACUTE 0x00ce: 0x00eb, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 0x00cf: 0x00ec, # LATIN CAPITAL LETTER I WITH DIAERESIS 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE 0x00d2: 0x00f1, # LATIN CAPITAL LETTER O WITH GRAVE 0x00d3: 0x00ee, # LATIN CAPITAL LETTER O WITH ACUTE 0x00d4: 0x00ef, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00d5: 0x00cd, # LATIN CAPITAL LETTER O WITH TILDE 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x00d9: 0x00f4, # LATIN CAPITAL LETTER U WITH GRAVE 0x00da: 0x00f2, # LATIN CAPITAL LETTER U WITH ACUTE 0x00db: 0x00f3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00df: 0x00a7, # LATIN SMALL LETTER SHARP S 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00e3: 0x008b, # LATIN SMALL LETTER A WITH TILDE 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS 0x00e5: 0x008c, # LATIN SMALL LETTER A WITH RING ABOVE 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS 0x00ec: 0x0093, # LATIN SMALL LETTER I WITH GRAVE 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE 0x00f2: 0x0098, # LATIN SMALL LETTER O WITH GRAVE 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00f5: 0x009b, # LATIN SMALL LETTER O WITH TILDE 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS 0x00f7: 0x00d6, # DIVISION SIGN 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS 0x00ff: 0x00d8, # LATIN SMALL LETTER Y WITH DIAERESIS 0x0102: 0x00ae, # LATIN CAPITAL LETTER A WITH BREVE 0x0103: 0x00be, # LATIN SMALL LETTER A WITH BREVE 0x0131: 0x00f5, # LATIN SMALL LETTER DOTLESS I 0x0152: 0x00ce, # LATIN CAPITAL LIGATURE OE 0x0153: 0x00cf, # LATIN SMALL LIGATURE OE 0x0178: 0x00d9, # LATIN CAPITAL LETTER Y WITH DIAERESIS 0x0192: 0x00c4, # LATIN SMALL LETTER F WITH HOOK 0x0218: 0x00af, # LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later 0x0219: 0x00bf, # LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later 0x021a: 0x00de, # LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later 0x021b: 0x00df, # LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later 0x02c6: 0x00f6, # MODIFIER LETTER CIRCUMFLEX ACCENT 0x02c7: 0x00ff, # CARON 0x02d8: 0x00f9, # BREVE 0x02d9: 0x00fa, # DOT ABOVE 0x02da: 0x00fb, # RING ABOVE 0x02db: 0x00fe, # OGONEK 0x02dc: 0x00f7, # SMALL TILDE 0x02dd: 0x00fd, # DOUBLE ACUTE ACCENT 0x03a9: 0x00bd, # GREEK CAPITAL LETTER OMEGA 0x03c0: 0x00b9, # GREEK SMALL LETTER PI 0x2013: 0x00d0, # EN DASH 0x2014: 0x00d1, # EM DASH 0x2018: 0x00d4, # LEFT SINGLE QUOTATION MARK 0x2019: 0x00d5, # RIGHT SINGLE QUOTATION MARK 0x201a: 0x00e2, # SINGLE LOW-9 QUOTATION MARK 0x201c: 0x00d2, # LEFT DOUBLE QUOTATION MARK 0x201d: 0x00d3, # RIGHT DOUBLE QUOTATION MARK 0x201e: 0x00e3, # DOUBLE LOW-9 QUOTATION MARK 0x2020: 0x00a0, # DAGGER 0x2021: 0x00e0, # DOUBLE DAGGER 0x2022: 0x00a5, # BULLET 0x2026: 0x00c9, # HORIZONTAL ELLIPSIS 0x2030: 0x00e4, # PER MILLE SIGN 0x2039: 0x00dc, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK 0x203a: 0x00dd, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 0x2044: 0x00da, # FRACTION SLASH 0x20ac: 0x00db, # EURO SIGN 0x2122: 0x00aa, # TRADE MARK SIGN 0x2202: 0x00b6, # PARTIAL DIFFERENTIAL 0x2206: 0x00c6, # INCREMENT 0x220f: 0x00b8, # N-ARY PRODUCT 0x2211: 0x00b7, # N-ARY SUMMATION 0x221a: 0x00c3, # SQUARE ROOT 0x221e: 0x00b0, # INFINITY 0x222b: 0x00ba, # INTEGRAL 0x2248: 0x00c5, # ALMOST EQUAL TO 0x2260: 0x00ad, # NOT EQUAL TO 0x2264: 0x00b2, # LESS-THAN OR EQUAL TO 0x2265: 0x00b3, # GREATER-THAN OR EQUAL TO 0x25ca: 0x00d7, # LOZENGE 0xf8ff: 0x00f0, # Apple logo } From lemburg at users.sourceforge.net Fri Oct 21 16:02:32 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Fri, 21 Oct 2005 16:02:32 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/encodings aliases.py, 1.29, 1.30 Message-ID: <20051021140232.24EB51E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/encodings In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19297 Modified Files: aliases.py Log Message: Alias iso8859_1 to latin_1 which is the same encoding, but has a much faster codec implementation. Index: aliases.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/aliases.py,v retrieving revision 1.29 retrieving revision 1.30 diff -u -d -r1.29 -r1.30 --- aliases.py 9 Oct 2005 19:41:19 -0000 1.29 +++ aliases.py 21 Oct 2005 14:02:28 -0000 1.30 @@ -393,11 +393,18 @@ 'cskoi8r' : 'koi8_r', # latin_1 codec + # + # Note that the latin_1 codec is implemented internally in C and a + # lot faster than the charmap codec iso8859_1 which uses the same + # encoding. This is why we discourage the use of the iso8859_1 + # codec and alias it to latin_1 instead. + # '8859' : 'latin_1', 'cp819' : 'latin_1', 'csisolatin1' : 'latin_1', 'ibm819' : 'latin_1', 'iso8859' : 'latin_1', + 'iso8859_1' : 'latin_1', 'iso_8859_1' : 'latin_1', 'iso_8859_1_1987' : 'latin_1', 'iso_ir_100' : 'latin_1', From lemburg at users.sourceforge.net Fri Oct 21 16:35:40 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Fri, 21 Oct 2005 16:35:40 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/encodings cp500.py, 1.4, 1.5 cp1026.py, 1.4, 1.5 cp037.py, 1.4, 1.5 cp875.py, 1.4, 1.5 Message-ID: <20051021143540.EE2291E4218@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/encodings In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29561 Modified Files: cp500.py cp1026.py cp037.py cp875.py Log Message: Replace the old EBCDIC codecs with new ones using the decoding table. Index: cp500.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp500.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp500.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp500.py 21 Oct 2005 14:35:35 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP500.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/EBCDIC/CP500.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ [...991 lines suppressed...] + 0x00ee: 0x0056, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x0057, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x008c, # LATIN SMALL LETTER ETH (ICELANDIC) + 0x00f1: 0x0049, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00cd, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00ce, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00cb, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00cf, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00cc, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00e1, # DIVISION SIGN + 0x00f8: 0x0070, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00dd, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00de, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00db, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00dc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x008d, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x008e, # LATIN SMALL LETTER THORN (ICELANDIC) + 0x00ff: 0x00df, # LATIN SMALL LETTER Y WITH DIAERESIS +} \ No newline at end of file Index: cp1026.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1026.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp1026.py 8 Aug 2002 20:19:18 -0000 1.4 +++ cp1026.py 21 Oct 2005 14:35:35 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP1026.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/EBCDIC/CP1026.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ [...991 lines suppressed...] + 0x00f2: 0x00cd, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00ce, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00cb, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00cf, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00a1, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00e1, # DIVISION SIGN + 0x00f8: 0x0070, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00dd, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00de, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00db, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00e0, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00df, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011e: 0x005a, # LATIN CAPITAL LETTER G WITH BREVE + 0x011f: 0x00d0, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0x005b, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0x0079, # LATIN SMALL LETTER DOTLESS I + 0x015e: 0x007c, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x006a, # LATIN SMALL LETTER S WITH CEDILLA +} \ No newline at end of file Index: cp037.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp037.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp037.py 8 Aug 2002 20:19:18 -0000 1.4 +++ cp037.py 21 Oct 2005 14:35:35 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP037.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/EBCDIC/CP037.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ [...991 lines suppressed...] + 0x00ee: 0x0056, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x0057, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x008c, # LATIN SMALL LETTER ETH (ICELANDIC) + 0x00f1: 0x0049, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x00cd, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00ce, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x00cb, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00cf, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x00cc, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00e1, # DIVISION SIGN + 0x00f8: 0x0070, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x00dd, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00de, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x00db, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x00dc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x008d, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x008e, # LATIN SMALL LETTER THORN (ICELANDIC) + 0x00ff: 0x00df, # LATIN SMALL LETTER Y WITH DIAERESIS +} \ No newline at end of file Index: cp875.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp875.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- cp875.py 8 Aug 2002 20:19:19 -0000 1.4 +++ cp875.py 21 Oct 2005 14:35:35 -0000 1.5 @@ -1,9 +1,4 @@ -""" Python Character Mapping Codec generated from 'CP875.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal at lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/EBCDIC/CP875.TXT' with gencodec.py. """#" @@ -19,8 +14,8 @@ [...987 lines suppressed...] + 0x03c0: 0x00ad, # GREEK SMALL LETTER PI + 0x03c1: 0x00ae, # GREEK SMALL LETTER RHO + 0x03c2: 0x00ba, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0x00af, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00bb, # GREEK SMALL LETTER TAU + 0x03c5: 0x00bc, # GREEK SMALL LETTER UPSILON + 0x03c6: 0x00bd, # GREEK SMALL LETTER PHI + 0x03c7: 0x00be, # GREEK SMALL LETTER CHI + 0x03c8: 0x00bf, # GREEK SMALL LETTER PSI + 0x03c9: 0x00cb, # GREEK SMALL LETTER OMEGA + 0x03ca: 0x00b4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0x00b8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0x00b6, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0x00b7, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0x00b9, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2015: 0x00cf, # HORIZONTAL BAR + 0x2018: 0x00ce, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x00de, # RIGHT SINGLE QUOTATION MARK +} \ No newline at end of file From jhylton at users.sourceforge.net Fri Oct 21 16:58:09 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Fri, 21 Oct 2005 16:58:09 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Include compile.h,2.42,2.43 Message-ID: <20051021145809.21FE71E4018@bag.python.org> Update of /cvsroot/python/python/dist/src/Include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2903/Include Modified Files: compile.h Log Message: Fix a bunch of imports to use code.h instead of compile.h. Remove duplicate declarations from compile.h Index: compile.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/compile.h,v retrieving revision 2.42 retrieving revision 2.43 diff -u -d -r2.42 -r2.43 --- compile.h 20 Oct 2005 19:59:24 -0000 2.42 +++ compile.h 21 Oct 2005 14:58:06 -0000 2.43 @@ -11,11 +11,6 @@ /* Public interface */ struct _node; /* Declare the existence of this type */ PyAPI_FUNC(PyCodeObject *) PyNode_Compile(struct _node *, const char *); -PyAPI_FUNC(PyCodeObject *) PyCode_New( - int, int, int, int, PyObject *, PyObject *, PyObject *, PyObject *, - PyObject *, PyObject *, PyObject *, PyObject *, int, PyObject *); - /* same as struct above */ -PyAPI_FUNC(int) PyCode_Addr2Line(PyCodeObject *, int); /* Future feature support */ From jhylton at users.sourceforge.net Fri Oct 21 16:58:09 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Fri, 21 Oct 2005 16:58:09 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Objects frameobject.c,2.80,2.81 Message-ID: <20051021145809.5EBC51E4018@bag.python.org> Update of /cvsroot/python/python/dist/src/Objects In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2903/Objects Modified Files: frameobject.c Log Message: Fix a bunch of imports to use code.h instead of compile.h. Remove duplicate declarations from compile.h Index: frameobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/frameobject.c,v retrieving revision 2.80 retrieving revision 2.81 diff -u -d -r2.80 -r2.81 --- frameobject.c 20 Oct 2005 19:59:24 -0000 2.80 +++ frameobject.c 21 Oct 2005 14:58:06 -0000 2.81 @@ -4,7 +4,6 @@ #include "Python.h" #include "code.h" -#include "compile.h" #include "frameobject.h" #include "opcode.h" #include "structmember.h" From jhylton at users.sourceforge.net Fri Oct 21 16:58:09 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Fri, 21 Oct 2005 16:58:09 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python bltinmodule.c, 2.328, 2.329 marshal.c, 1.89, 1.90 symtable.c, 2.14, 2.15 Message-ID: <20051021145809.8961A1E403E@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2903/Python Modified Files: bltinmodule.c marshal.c symtable.c Log Message: Fix a bunch of imports to use code.h instead of compile.h. Remove duplicate declarations from compile.h Index: bltinmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/bltinmodule.c,v retrieving revision 2.328 retrieving revision 2.329 diff -u -d -r2.328 -r2.329 --- bltinmodule.c 20 Oct 2005 19:59:25 -0000 2.328 +++ bltinmodule.c 21 Oct 2005 14:58:06 -0000 2.329 @@ -4,7 +4,6 @@ #include "node.h" #include "code.h" -#include "compile.h" #include "eval.h" #include Index: marshal.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/marshal.c,v retrieving revision 1.89 retrieving revision 1.90 diff -u -d -r1.89 -r1.90 --- marshal.c 20 Oct 2005 19:59:25 -0000 1.89 +++ marshal.c 21 Oct 2005 14:58:06 -0000 1.90 @@ -7,7 +7,6 @@ #include "Python.h" #include "longintrepr.h" #include "code.h" -#include "compile.h" #include "marshal.h" /* High water mark to determine when the marshalled object is dangerously deep Index: symtable.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/symtable.c,v retrieving revision 2.14 retrieving revision 2.15 diff -u -d -r2.14 -r2.15 --- symtable.c 21 Oct 2005 12:57:31 -0000 2.14 +++ symtable.c 21 Oct 2005 14:58:06 -0000 2.15 @@ -1,7 +1,6 @@ #include "Python.h" #include "Python-ast.h" #include "code.h" -#include "compile.h" #include "symtable.h" #include "structmember.h" From jhylton at users.sourceforge.net Fri Oct 21 16:58:09 2005 From: jhylton at users.sourceforge.net (jhylton@users.sourceforge.net) Date: Fri, 21 Oct 2005 16:58:09 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Modules _hotshot.c, 1.38, 1.39 main.c, 1.85, 1.86 pyexpat.c, 2.91, 2.92 zipimport.c, 1.19, 1.20 Message-ID: <20051021145809.B06991E401F@bag.python.org> Update of /cvsroot/python/python/dist/src/Modules In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2903/Modules Modified Files: _hotshot.c main.c pyexpat.c zipimport.c Log Message: Fix a bunch of imports to use code.h instead of compile.h. Remove duplicate declarations from compile.h Index: _hotshot.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/_hotshot.c,v retrieving revision 1.38 retrieving revision 1.39 diff -u -d -r1.38 -r1.39 --- _hotshot.c 20 Oct 2005 19:59:24 -0000 1.38 +++ _hotshot.c 21 Oct 2005 14:58:06 -0000 1.39 @@ -4,7 +4,6 @@ #include "Python.h" #include "code.h" -#include "compile.h" #include "eval.h" #include "frameobject.h" #include "structmember.h" Index: main.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/main.c,v retrieving revision 1.85 retrieving revision 1.86 diff -u -d -r1.85 -r1.86 --- main.c 3 Oct 2005 00:54:57 -0000 1.85 +++ main.c 21 Oct 2005 14:58:06 -0000 1.86 @@ -2,7 +2,7 @@ #include "Python.h" #include "osdefs.h" -#include "compile.h" /* For CO_FUTURE_DIVISION */ +#include "code.h" /* For CO_FUTURE_DIVISION */ #include "import.h" #ifdef __VMS Index: pyexpat.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/pyexpat.c,v retrieving revision 2.91 retrieving revision 2.92 diff -u -d -r2.91 -r2.92 --- pyexpat.c 30 Sep 2005 04:46:49 -0000 2.91 +++ pyexpat.c 21 Oct 2005 14:58:06 -0000 2.92 @@ -1,7 +1,6 @@ #include "Python.h" #include -#include "compile.h" #include "frameobject.h" #include "expat.h" Index: zipimport.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/zipimport.c,v retrieving revision 1.19 retrieving revision 1.20 diff -u -d -r1.19 -r1.20 --- zipimport.c 26 Aug 2005 06:42:30 -0000 1.19 +++ zipimport.c 21 Oct 2005 14:58:06 -0000 1.20 @@ -2,7 +2,6 @@ #include "structmember.h" #include "osdefs.h" #include "marshal.h" -#include "compile.h" #include From nascheme at users.sourceforge.net Fri Oct 21 20:09:22 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Fri, 21 Oct 2005 20:09:22 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python compile.c,2.354,2.355 Message-ID: <20051021180922.E92D61E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19270/Python Modified Files: compile.c Log Message: Use as the function name for lambdas (matches old compiler). Index: compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.354 retrieving revision 2.355 diff -u -d -r2.354 -r2.355 --- compile.c 21 Oct 2005 12:57:31 -0000 2.354 +++ compile.c 21 Oct 2005 18:09:19 -0000 2.355 @@ -1961,7 +1961,7 @@ arguments_ty args = e->v.Lambda.args; assert(e->kind == Lambda_kind); - name = PyString_InternFromString("lambda"); + name = PyString_InternFromString(""); if (!name) return 0; From nascheme at users.sourceforge.net Fri Oct 21 20:11:43 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Fri, 21 Oct 2005 20:11:43 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_repr.py,1.20,1.21 Message-ID: <20051021181143.E1AD21E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20151/Lib/test Modified Files: test_repr.py Log Message: Revert change, func_name of lambda's is back to . Index: test_repr.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_repr.py,v retrieving revision 1.20 retrieving revision 1.21 diff -u -d -r1.20 -r1.21 --- test_repr.py 20 Oct 2005 19:59:24 -0000 1.20 +++ test_repr.py 21 Oct 2005 18:11:40 -0000 1.21 @@ -123,7 +123,7 @@ def test_lambda(self): self.failUnless(repr(lambda x: x).startswith( - " Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv10186/Lib/test Modified Files: test_generators.py Log Message: Revert previous checkin: According to Jeremy, the comment only made sense when the yield was disallowed. Now it's testing that the yield is allowed, so it's not bad and the outer finally is irrelevant. Index: test_generators.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_generators.py,v retrieving revision 1.50 retrieving revision 1.51 diff -u -d -r1.50 -r1.51 --- test_generators.py 21 Oct 2005 04:34:18 -0000 1.50 +++ test_generators.py 22 Oct 2005 03:51:42 -0000 1.51 @@ -774,7 +774,7 @@ ... try: ... 1//0 ... except ZeroDivisionError: -... yield 666 # bad because *outer* try has finally +... yield 666 ... except: ... pass ... finally: From pje at users.sourceforge.net Sat Oct 22 21:07:48 2005 From: pje at users.sourceforge.net (pje@users.sourceforge.net) Date: Sat, 22 Oct 2005 21:07:48 +0200 (CEST) Subject: [Python-checkins] python/nondist/sandbox/setuptools pkg_resources.py, 1.75, 1.76 pkg_resources.txt, 1.18, 1.19 Message-ID: <20051022190748.882F71E4004@bag.python.org> Update of /cvsroot/python/python/nondist/sandbox/setuptools In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23380 Modified Files: pkg_resources.py pkg_resources.txt Log Message: Fixed a problem extracting zipped files on Windows, when the egg in question has had changed contents but still has the same version number. Index: pkg_resources.py =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/pkg_resources.py,v retrieving revision 1.75 retrieving revision 1.76 diff -u -d -r1.75 -r1.76 --- pkg_resources.py 18 Oct 2005 04:08:45 -0000 1.75 +++ pkg_resources.py 22 Oct 2005 19:07:44 -0000 1.76 @@ -1071,7 +1071,6 @@ manager, os.path.join(zip_path, name) ) return os.path.dirname(last) # return the extracted directory name - zip_stat = self.zipinfo[zip_path] t,d,size = zip_stat[5], zip_stat[6], zip_stat[3] date_time = ( @@ -1080,21 +1079,18 @@ ) timestamp = time.mktime(date_time) real_path = manager.get_cache_path(self.egg_name, self._parts(zip_path)) - if os.path.isfile(real_path): stat = os.stat(real_path) if stat.st_size==size and stat.st_mtime==timestamp: # size and stamp match, don't bother extracting return real_path - from tempfile import mkstemp outf, tmpnam = mkstemp(".$extract", dir=os.path.dirname(real_path)) os.write(outf, self.loader.get_data(zip_path)) os.close(outf) os.utime(tmpnam, (timestamp,timestamp)) manager.postprocess(tmpnam, real_path) - try: - os.rename(tmpnam, real_path) + try: os.rename(tmpnam, real_path) except os.error: if os.path.isfile(real_path): stat = os.stat(real_path) @@ -1102,6 +1098,10 @@ # size and stamp match, somebody did it just ahead of us # so we're done return real_path + elif os.name=='nt': # Windows, delete old file and retry + os.unlink(real_path) + os.rename(tmpnam, real_path) + return real_path raise return real_path Index: pkg_resources.txt =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/pkg_resources.txt,v retrieving revision 1.18 retrieving revision 1.19 diff -u -d -r1.18 -r1.19 --- pkg_resources.txt 18 Oct 2005 04:08:45 -0000 1.18 +++ pkg_resources.txt 22 Oct 2005 19:07:44 -0000 1.19 @@ -1503,6 +1503,9 @@ depender's preferences to override those of a dependee, to prevent conflicts when a lower version is acceptable to the dependee, but not the depender. + * Fixed a problem extracting zipped files on Windows, when the egg in question + has had changed contents but still has the same version number. + 0.6a4 * Fix a bug in ``WorkingSet.resolve()`` that was introduced in 0.6a3. From nnorwitz at users.sourceforge.net Sun Oct 23 02:44:06 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Sun, 23 Oct 2005 02:44:06 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_genexps.py, 1.7, 1.7.2.1 Message-ID: <20051023004406.CF54B1E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24884/Lib/test Modified Files: Tag: release24-maint test_genexps.py Log Message: Backport: SF bug #1167751: fix incorrect code being for generator expressions. Index: test_genexps.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_genexps.py,v retrieving revision 1.7 retrieving revision 1.7.2.1 diff -u -d -r1.7 -r1.7.2.1 --- test_genexps.py 30 Sep 2004 22:29:03 -0000 1.7 +++ test_genexps.py 23 Oct 2005 00:44:03 -0000 1.7.2.1 @@ -82,6 +82,18 @@ ... SyntaxError: invalid syntax +Verify that parenthesis are required when used as a keyword argument value + + >>> dict(a = i for i in xrange(10)) + Traceback (most recent call last): + ... + SyntaxError: invalid syntax + +Verify that parenthesis are required when used as a keyword argument value + + >>> dict(a = (i for i in xrange(10))) #doctest: +ELLIPSIS + {'a': } + Verify early binding for the outermost for-expression >>> x=10 From nnorwitz at users.sourceforge.net Sun Oct 23 02:44:06 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Sun, 23 Oct 2005 02:44:06 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python graminit.c,2.38,2.38.2.1 Message-ID: <20051023004406.E16481E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24884/Python Modified Files: Tag: release24-maint graminit.c Log Message: Backport: SF bug #1167751: fix incorrect code being for generator expressions. Index: graminit.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/graminit.c,v retrieving revision 2.38 retrieving revision 2.38.2.1 diff -u -d -r2.38 -r2.38.2.1 --- graminit.c 31 Aug 2004 10:07:13 -0000 2.38 +++ graminit.c 23 Oct 2005 00:44:03 -0000 2.38.2.1 @@ -1496,26 +1496,34 @@ {26, 1}, }; static arc arcs_69_1[3] = { - {25, 2}, - {147, 3}, + {147, 2}, + {25, 3}, {0, 1}, }; static arc arcs_69_2[1] = { - {26, 4}, + {0, 2}, }; static arc arcs_69_3[1] = { - {0, 3}, + {26, 4}, }; static arc arcs_69_4[2] = { - {147, 3}, + {13, 5}, {0, 4}, }; -static state states_69[5] = { +static arc arcs_69_5[1] = { + {147, 6}, +}; +static arc arcs_69_6[1] = { + {15, 2}, +}; +static state states_69[7] = { {1, arcs_69_0}, {3, arcs_69_1}, {1, arcs_69_2}, {1, arcs_69_3}, {2, arcs_69_4}, + {1, arcs_69_5}, + {1, arcs_69_6}, }; static arc arcs_70_0[2] = { {146, 1}, @@ -1791,7 +1799,7 @@ "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\002"}, {324, "arglist", 0, 8, states_68, "\000\040\010\060\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000"}, - {325, "argument", 0, 5, states_69, + {325, "argument", 0, 7, states_69, "\000\040\010\000\000\000\000\000\000\000\000\000\000\002\000\140\010\111\023\000"}, {326, "list_iter", 0, 2, states_70, "\000\000\000\000\000\000\000\000\000\000\000\042\000\000\000\000\000\000\000\000"}, From nnorwitz at users.sourceforge.net Sun Oct 23 02:44:06 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Sun, 23 Oct 2005 02:44:06 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Grammar Grammar,1.52,1.52.2.1 Message-ID: <20051023004406.E1F2E1E4006@bag.python.org> Update of /cvsroot/python/python/dist/src/Grammar In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24884/Grammar Modified Files: Tag: release24-maint Grammar Log Message: Backport: SF bug #1167751: fix incorrect code being for generator expressions. Index: Grammar =================================================================== RCS file: /cvsroot/python/python/dist/src/Grammar/Grammar,v retrieving revision 1.52 retrieving revision 1.52.2.1 diff -u -d -r1.52 -r1.52.2.1 --- Grammar 31 Aug 2004 10:07:00 -0000 1.52 +++ Grammar 23 Oct 2005 00:44:03 -0000 1.52.2.1 @@ -102,7 +102,7 @@ classdef: 'class' NAME ['(' testlist ')'] ':' suite arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) -argument: [test '='] test [gen_for] # Really [keyword '='] test +argument: test [gen_for] | test '=' test ['(' gen_for ')'] # Really [keyword '='] test list_iter: list_for | list_if list_for: 'for' exprlist 'in' testlist_safe [list_iter] From nnorwitz at users.sourceforge.net Sun Oct 23 02:44:07 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Sun, 23 Oct 2005 02:44:07 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS, 1.1193.2.121, 1.1193.2.122 Message-ID: <20051023004407.5A2D11E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24884/Misc Modified Files: Tag: release24-maint NEWS Log Message: Backport: SF bug #1167751: fix incorrect code being for generator expressions. Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1193.2.121 retrieving revision 1.1193.2.122 diff -u -d -r1.1193.2.121 -r1.1193.2.122 --- NEWS 15 Oct 2005 16:44:57 -0000 1.1193.2.121 +++ NEWS 23 Oct 2005 00:44:03 -0000 1.1193.2.122 @@ -12,6 +12,9 @@ Core and builtins ----------------- +- SF bug #1167751: fix incorrect code being for generator expressions. + The following code now raises a SyntaxError: foo(a = i for i in range(10)) + - SF Bug #976608: fix SystemError when mtime of an imported file is -1. - SF Bug #887946: fix segfault when redirecting stdin from a directory. From nascheme at users.sourceforge.net Sun Oct 23 05:38:23 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Sun, 23 Oct 2005 05:38:23 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python ast.c,2.3,2.4 Message-ID: <20051023033823.21A471E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21829/Python Modified Files: ast.c Log Message: Add an assert. Index: ast.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/ast.c,v retrieving revision 2.3 retrieving revision 2.4 diff -u -d -r2.3 -r2.4 --- ast.c 21 Oct 2005 12:57:31 -0000 2.3 +++ ast.c 23 Oct 2005 03:38:19 -0000 2.4 @@ -2178,6 +2178,7 @@ n = CHILD(n, 0); if (STR(CHILD(n, 0))[0] == 'i') { /* import */ n = CHILD(n, 1); + REQ(n, dotted_as_names); aliases = asdl_seq_new((NCH(n) + 1) / 2); if (!aliases) return NULL; From nascheme at users.sourceforge.net Sun Oct 23 05:45:46 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Sun, 23 Oct 2005 05:45:46 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python compile.c,2.355,2.356 Message-ID: <20051023034546.732C21E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22746/Python Modified Files: compile.c Log Message: Don't stop generating code for import statements after the first "import as" part. Fixes one bug from #1333982. Index: compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.355 retrieving revision 2.356 diff -u -d -r2.355 -r2.356 --- compile.c 21 Oct 2005 18:09:19 -0000 2.355 +++ compile.c 23 Oct 2005 03:45:42 -0000 2.356 @@ -2359,8 +2359,9 @@ ADDOP_NAME(c, IMPORT_NAME, alias->name, names); if (alias->asname) { - return compiler_import_as(c, - alias->name, alias->asname); + r = compiler_import_as(c, alias->name, alias->asname); + if (!r) + return r; } else { identifier tmp = alias->name; From nascheme at users.sourceforge.net Sun Oct 23 06:24:44 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Sun, 23 Oct 2005 06:24:44 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_scope.py, 1.28, 1.29 Message-ID: <20051023042444.E44971E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv28445/Lib/test Modified Files: test_scope.py Log Message: Fix arigo's funky LOAD_NAME bug: implicit globals inside classes have historically been looked up using LOAD_NAME, not LOAD_GLOBAL. looked up by LOAD_NAME, not Index: test_scope.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_scope.py,v retrieving revision 1.28 retrieving revision 1.29 diff -u -d -r1.28 -r1.29 --- test_scope.py 20 Oct 2005 19:59:24 -0000 1.28 +++ test_scope.py 23 Oct 2005 04:24:39 -0000 1.29 @@ -440,6 +440,15 @@ x = -1 vereq(test(3)(2), 5) +looked_up_by_load_name = False +class X: + # Implicit globals inside classes are be looked up by LOAD_NAME, not + # LOAD_GLOBAL. + locals()['looked_up_by_load_name'] = True + passed = looked_up_by_load_name + +verify(X.passed) + print "18. verify that locals() works" def f(x): From nascheme at users.sourceforge.net Sun Oct 23 06:24:53 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Sun, 23 Oct 2005 06:24:53 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python compile.c,2.356,2.357 Message-ID: <20051023042453.A84311E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv28480/Python Modified Files: compile.c Log Message: Fix arigo's funky LOAD_NAME bug: implicit globals inside classes have historically been looked up using LOAD_NAME, not LOAD_GLOBAL. looked up by LOAD_NAME, not Index: compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.356 retrieving revision 2.357 diff -u -d -r2.356 -r2.357 --- compile.c 23 Oct 2005 03:45:42 -0000 2.356 +++ compile.c 23 Oct 2005 04:24:49 -0000 2.357 @@ -2731,7 +2731,8 @@ optype = OP_FAST; break; case GLOBAL_IMPLICIT: - if (!c->u->u_ste->ste_unoptimized) + if (c->u->u_ste->ste_type == FunctionBlock && + !c->u->u_ste->ste_unoptimized) optype = OP_GLOBAL; break; case GLOBAL_EXPLICIT: From rhettinger at users.sourceforge.net Sun Oct 23 06:47:17 2005 From: rhettinger at users.sourceforge.net (rhettinger@users.sourceforge.net) Date: Sun, 23 Oct 2005 06:47:17 +0200 (CEST) Subject: [Python-checkins] python/dist/src/PC/VC6 pythoncore.dsp,1.18,1.19 Message-ID: <20051023044717.E9C4E1E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/PC/VC6 In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv1402 Modified Files: pythoncore.dsp Log Message: Add AST files to VC6 build. Index: pythoncore.dsp =================================================================== RCS file: /cvsroot/python/python/dist/src/PC/VC6/pythoncore.dsp,v retrieving revision 1.18 retrieving revision 1.19 diff -u -d -r1.18 -r1.19 --- pythoncore.dsp 24 Aug 2005 00:28:21 -0000 1.18 +++ pythoncore.dsp 23 Oct 2005 04:47:13 -0000 1.19 @@ -173,6 +173,14 @@ # End Source File # Begin Source File +SOURCE=..\..\Python\asdl.c +# End Source File +# Begin Source File + +SOURCE=..\..\Python\ast.c +# End Source File +# Begin Source File + SOURCE=..\..\Modules\audioop.c # End Source File # Begin Source File @@ -221,6 +229,10 @@ # End Source File # Begin Source File +SOURCE=..\..\Objects\codeobject.c +# End Source File +# Begin Source File + SOURCE=..\..\Modules\collectionsmodule.c # End Source File # Begin Source File @@ -511,6 +523,10 @@ # End Source File # Begin Source File +SOURCE=..\..\Python\Python-ast.c +# End Source File +# Begin Source File + SOURCE=..\..\Python\pythonrun.c # End Source File # Begin Source File From mhammond at users.sourceforge.net Sun Oct 23 12:50:08 2005 From: mhammond at users.sourceforge.net (mhammond@users.sourceforge.net) Date: Sun, 23 Oct 2005 12:50:08 +0200 (CEST) Subject: [Python-checkins] python/dist/src/PCbuild pythoncore.vcproj, 1.30, 1.31 Message-ID: <20051023105008.DF5B81E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/PCbuild In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27016 Modified Files: pythoncore.vcproj Log Message: Remove conflict markers and add parsermodule.c to get things building Index: pythoncore.vcproj =================================================================== RCS file: /cvsroot/python/python/dist/src/PCbuild/pythoncore.vcproj,v retrieving revision 1.30 retrieving revision 1.31 diff -u -d -r1.30 -r1.31 --- pythoncore.vcproj 20 Oct 2005 19:59:24 -0000 1.30 +++ pythoncore.vcproj 23 Oct 2005 10:50:04 -0000 1.31 @@ -815,6 +815,9 @@ RelativePath="..\Modules\collectionsmodule.c"> + + @@ -2318,9 +2321,6 @@ - - @@ -2456,6 +2456,9 @@ + + @@ -2759,7 +2762,7 @@ + RelativePath="..\Modules\sha256module.c"> + RelativePath="..\Modules\sha512module.c"> + RelativePath="..\Modules\sha512module.c"> -======= - RelativePath="..\Modules\sha256module.c"> ->>>>>>> 1.26.2.3 + RelativePath="..\Modules\shamodule.c"> + RelativePath="..\Modules\signalmodule.c"> Update of /cvsroot/python/python/dist/src/Include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27527 Modified Files: pythonrun.h Log Message: Correct error to PyRun_SimpleString macro introduced in AST merge. Index: pythonrun.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/pythonrun.h,v retrieving revision 2.68 retrieving revision 2.69 diff -u -d -r2.68 -r2.69 --- pythonrun.h 20 Oct 2005 19:59:24 -0000 2.68 +++ pythonrun.h 23 Oct 2005 10:53:06 -0000 2.69 @@ -82,7 +82,7 @@ PyRun_AnyFileExFlags(fp, name, closeit, NULL) #define PyRun_AnyFileFlags(fp, name, flags) \ PyRun_AnyFileExFlags(fp, name, 0, flags) -#define PyRun_SimpleString(s, f) PyRunSimpleStringFlags(s, f, NULL) +#define PyRun_SimpleString(s) PyRun_SimpleStringFlags(s, NULL) #define PyRun_SimpleFile(f, p) PyRun_SimpleFileExFlags(f, p, 0, NULL) #define PyRun_SimpleFileEx(f, p, c) PyRun_SimpleFileExFlags(f, p, c, NULL) #define PyRun_InteractiveOne(f, p) PyRun_InteractiveOneFlags(f, p, NULL) From lemburg at users.sourceforge.net Sun Oct 23 15:43:44 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Sun, 23 Oct 2005 15:43:44 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Misc NEWS,1.1393,1.1394 Message-ID: <20051023134344.4FBF31E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Misc In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24032/Misc Modified Files: NEWS Log Message: Add news item for the new charmap codecs. Index: NEWS =================================================================== RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v retrieving revision 1.1393 retrieving revision 1.1394 diff -u -d -r1.1393 -r1.1394 --- NEWS 21 Oct 2005 06:32:02 -0000 1.1393 +++ NEWS 23 Oct 2005 13:43:40 -0000 1.1394 @@ -12,6 +12,14 @@ Core and builtins ----------------- +- Replaced most Unicode charmap codecs with new ones using the + new Unicode translate string feature in the builtin charmap + codec; the codecs were created from the mapping tables available + at ftp.unicode.org and contain a few updates (e.g. the Mac OS + encodings now include a mapping for the Apple logo) + +- Added a few more codecs for Mac OS encodings + - Speed up some Unicode operations. - A new AST parser implementation was completed. From nascheme at users.sourceforge.net Sun Oct 23 19:21:58 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Sun, 23 Oct 2005 19:21:58 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python compile.c,2.357,2.358 Message-ID: <20051023172158.05C821E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5037/Python Modified Files: compile.c Log Message: Use PyTuple_Pack instead of Py_BuildValue. Index: compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.357 retrieving revision 2.358 diff -u -d -r2.357 -r2.358 --- compile.c 23 Oct 2005 04:24:49 -0000 2.357 +++ compile.c 23 Oct 2005 17:21:54 -0000 2.358 @@ -1489,8 +1489,7 @@ int arg; /* necessary to make sure types aren't coerced (e.g., int and long) */ - /* XXX should use: t = PyTuple_Pack(2, o, o->ob_type); */ - t = Py_BuildValue("(OO)", o, o->ob_type); + t = PyTuple_Pack(2, o, o->ob_type); if (t == NULL) return -1; From nascheme at users.sourceforge.net Sun Oct 23 20:37:20 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Sun, 23 Oct 2005 20:37:20 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Include symtable.h,2.15,2.16 Message-ID: <20051023183720.261191E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19669/Include Modified Files: symtable.h Log Message: Fix private name mangling. The symtable also must do mangles so that the scope of names can be correctly determined. Index: symtable.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/symtable.h,v retrieving revision 2.15 retrieving revision 2.16 diff -u -d -r2.15 -r2.16 --- symtable.h 21 Oct 2005 04:19:49 -0000 2.15 +++ symtable.h 23 Oct 2005 18:37:16 -0000 2.16 @@ -17,7 +17,7 @@ PyObject *st_stack; /* stack of namespace info */ PyObject *st_global; /* borrowed ref to MODULE in st_symbols */ int st_nblocks; /* number of blocks */ - char *st_private; /* name of current class or NULL */ + PyObject *st_private; /* name of current class or NULL */ int st_tmpname; /* temporary name counter */ PyFutureFeatures *st_future; /* module's future features */ }; From nascheme at users.sourceforge.net Sun Oct 23 20:37:32 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Sun, 23 Oct 2005 20:37:32 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python compile.c,2.358,2.359 Message-ID: <20051023183732.8D6781E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19748/Python Modified Files: compile.c Log Message: Fix private name mangling. The symtable also must do mangles so that the scope of names can be correctly determined. Index: compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.358 retrieving revision 2.359 diff -u -d -r2.358 -r2.359 --- compile.c 23 Oct 2005 17:21:54 -0000 2.358 +++ compile.c 23 Oct 2005 18:37:27 -0000 2.359 @@ -2701,10 +2701,11 @@ static int compiler_nameop(struct compiler *c, identifier name, expr_context_ty ctx) { - int op, scope; + int op, scope, r, arg; enum { OP_FAST, OP_GLOBAL, OP_DEREF, OP_NAME } optype; PyObject *dict = c->u->u_names; + PyObject *mangled; /* XXX AugStore isn't used anywhere! */ /* First check for assignment to __debug__. Param? */ @@ -2713,9 +2714,13 @@ return compiler_error(c, "can not assign to __debug__"); } + mangled = _Py_Mangle(c->u->u_private, name); + if (!mangled) + return 0; + op = 0; optype = OP_NAME; - scope = PyST_GetScope(c->u->u_ste, name); + scope = PyST_GetScope(c->u->u_ste, mangled); switch (scope) { case FREE: dict = c->u->u_freevars; @@ -2755,6 +2760,7 @@ "can not delete variable '%s' referenced " "in nested scope", PyString_AS_STRING(name)); + Py_DECREF(mangled); return 0; break; case Param: @@ -2772,7 +2778,8 @@ case Param: assert(0); /* impossible */ } - ADDOP_O(c, op, name, varnames); + ADDOP_O(c, op, mangled, varnames); + Py_DECREF(mangled); return 1; case OP_GLOBAL: switch (ctx) { @@ -2801,7 +2808,12 @@ } assert(op); - return compiler_addop_name(c, op, dict, name); + arg = compiler_add_o(c, dict, mangled); + if (arg < 0) + return 0; + r = compiler_addop_i(c, op, arg); + Py_DECREF(mangled); + return r; } static int From nascheme at users.sourceforge.net Sun Oct 23 20:37:45 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Sun, 23 Oct 2005 20:37:45 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python symtable.c,2.15,2.16 Message-ID: <20051023183745.87B481E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19848/Python Modified Files: symtable.c Log Message: Fix private name mangling. The symtable also must do mangles so that the scope of names can be correctly determined. Index: symtable.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/symtable.c,v retrieving revision 2.15 retrieving revision 2.16 diff -u -d -r2.15 -r2.16 --- symtable.c 21 Oct 2005 14:58:06 -0000 2.15 +++ symtable.c 23 Oct 2005 18:37:42 -0000 2.16 @@ -740,8 +740,11 @@ symtable_lookup(struct symtable *st, PyObject *name) { PyObject *o; - - o = PyDict_GetItem(st->st_cur->ste_symbols, name); + PyObject *mangled = _Py_Mangle(st->st_private, name); + if (!mangled) + return 0; + o = PyDict_GetItem(st->st_cur->ste_symbols, mangled); + Py_DECREF(mangled); if (!o) return 0; return PyInt_AsLong(o); @@ -753,49 +756,57 @@ PyObject *o; PyObject *dict; int val; + PyObject *mangled = _Py_Mangle(st->st_private, name); + if (!mangled) + return 0; dict = st->st_cur->ste_symbols; - if ((o = PyDict_GetItem(dict, name))) { + if ((o = PyDict_GetItem(dict, mangled))) { val = PyInt_AS_LONG(o); if ((flag & DEF_PARAM) && (val & DEF_PARAM)) { + /* Is it better to use 'mangled' or 'name' here? */ PyErr_Format(PyExc_SyntaxError, DUPLICATE_ARGUMENT, PyString_AsString(name)); PyErr_SyntaxLocation(st->st_filename, st->st_cur->ste_lineno); - return 0; + goto error; } val |= flag; } else val = flag; o = PyInt_FromLong(val); if (o == NULL) - return 0; - if (PyDict_SetItem(dict, name, o) < 0) { + goto error; + if (PyDict_SetItem(dict, mangled, o) < 0) { Py_DECREF(o); - return 0; + goto error; } Py_DECREF(o); if (flag & DEF_PARAM) { - if (PyList_Append(st->st_cur->ste_varnames, name) < 0) - return 0; + if (PyList_Append(st->st_cur->ste_varnames, mangled) < 0) + goto error; } else if (flag & DEF_GLOBAL) { /* XXX need to update DEF_GLOBAL for other flags too; perhaps only DEF_FREE_GLOBAL */ val = flag; - if ((o = PyDict_GetItem(st->st_global, name))) { + if ((o = PyDict_GetItem(st->st_global, mangled))) { val |= PyInt_AS_LONG(o); } o = PyInt_FromLong(val); if (o == NULL) - return 0; - if (PyDict_SetItem(st->st_global, name, o) < 0) { + goto error; + if (PyDict_SetItem(st->st_global, mangled, o) < 0) { Py_DECREF(o); - return 0; + goto error; } Py_DECREF(o); } return 1; + +error: + Py_DECREF(mangled); + return 0; } /* VISIT, VISIT_SEQ and VIST_SEQ_TAIL take an ASDL type as their second argument. @@ -849,17 +860,22 @@ if (!symtable_exit_block(st, s)) return 0; break; - case ClassDef_kind: + case ClassDef_kind: { + PyObject *tmp; if (!symtable_add_def(st, s->v.ClassDef.name, DEF_LOCAL)) return 0; VISIT_SEQ(st, expr, s->v.ClassDef.bases); if (!symtable_enter_block(st, s->v.ClassDef.name, ClassBlock, (void *)s, s->lineno)) return 0; + tmp = st->st_private; + st->st_private = s->v.ClassDef.name; VISIT_SEQ(st, stmt, s->v.ClassDef.body); + st->st_private = tmp; if (!symtable_exit_block(st, s)) return 0; break; + } case Return_kind: if (s->v.Return.value) VISIT(st, expr, s->v.Return.value); From nascheme at users.sourceforge.net Sun Oct 23 20:50:39 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Sun, 23 Oct 2005 20:50:39 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python symtable.c,2.16,2.17 Message-ID: <20051023185039.527E41E402C@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22138/Python Modified Files: symtable.c Log Message: Fix check_unoptimized() function. The only optimized namespaces are in function blocks. This elimiates spurious warnings about "import *" and related statements at the class level. Index: symtable.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/symtable.c,v retrieving revision 2.16 retrieving revision 2.17 diff -u -d -r2.16 -r2.17 --- symtable.c 23 Oct 2005 18:37:42 -0000 2.16 +++ symtable.c 23 Oct 2005 18:50:36 -0000 2.17 @@ -445,7 +445,7 @@ char buf[300]; const char* trailer; - if (ste->ste_type == ModuleBlock || !ste->ste_unoptimized + if (ste->ste_type != FunctionBlock || !ste->ste_unoptimized || !(ste->ste_free || ste->ste_child_free)) return 1; From nascheme at users.sourceforge.net Sun Oct 23 20:52:39 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Sun, 23 Oct 2005 20:52:39 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python compile.c,2.359,2.360 Message-ID: <20051023185239.610E41E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22498/Python Modified Files: compile.c Log Message: Remove unnecessary local variable. Index: compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.359 retrieving revision 2.360 diff -u -d -r2.359 -r2.360 --- compile.c 23 Oct 2005 18:37:27 -0000 2.359 +++ compile.c 23 Oct 2005 18:52:36 -0000 2.360 @@ -2701,7 +2701,7 @@ static int compiler_nameop(struct compiler *c, identifier name, expr_context_ty ctx) { - int op, scope, r, arg; + int op, scope, arg; enum { OP_FAST, OP_GLOBAL, OP_DEREF, OP_NAME } optype; PyObject *dict = c->u->u_names; @@ -2811,9 +2811,8 @@ arg = compiler_add_o(c, dict, mangled); if (arg < 0) return 0; - r = compiler_addop_i(c, op, arg); Py_DECREF(mangled); - return r; + return compiler_addop_i(c, op, arg); } static int From nnorwitz at users.sourceforge.net Sun Oct 23 20:59:20 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Sun, 23 Oct 2005 20:59:20 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Parser asdl_c.py,2.1,2.2 Message-ID: <20051023185920.6B6B21E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Parser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23703/Parser Modified Files: asdl_c.py Log Message: Use PyErr_NoMemory() instead of rolling our own. Get rid of "int i" unused warnings from Python-ast.c which we are generating. Index: asdl_c.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Parser/asdl_c.py,v retrieving revision 2.1 retrieving revision 2.2 diff -u -d -r2.1 -r2.2 --- asdl_c.py 20 Oct 2005 19:59:24 -0000 2.1 +++ asdl_c.py 23 Oct 2005 18:59:17 -0000 2.2 @@ -282,7 +282,7 @@ emit("p = (%s)malloc(sizeof(*p));" % ctype, 1) emit("if (!p) {", 1) - emit("PyErr_SetString(PyExc_MemoryError, \"no memory\");", 2) + emit("PyErr_NoMemory();", 2) emit("return NULL;", 2) emit("}", 1) if union: @@ -491,9 +491,8 @@ self.emit("marshal_write_%s(PyObject **buf, int *off, %s o)" % (name, ctype), 0) self.emit("{", 0) - # XXX: add declaration of "int i;" properly - if has_seq or True: - self.emit("int i;", 1) # XXX only need it for sequences + if has_seq: + self.emit("int i;", 1) def func_end(self): self.emit("return 1;", 1) @@ -501,8 +500,7 @@ self.emit("", 0) def visitSum(self, sum, name): - has_seq = has_sequence(sum.types, False) - self.func_begin(name, has_seq) + self.func_begin(name, has_sequence(sum.types, False)) simple = is_simple(sum) if simple: self.emit("switch (o) {", 1) @@ -515,7 +513,7 @@ self.func_end() def visitProduct(self, prod, name): - self.func_begin(name, find_sequence(prod.fields, True)) + self.func_begin(name, find_sequence(prod.fields, False)) for field in prod.fields: self.visitField(field, name, 1, 1) self.func_end() From nnorwitz at users.sourceforge.net Sun Oct 23 21:06:05 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Sun, 23 Oct 2005 21:06:05 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Include compile.h,2.43,2.44 Message-ID: <20051023190605.4C1F71E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25576/Include Modified Files: compile.h Log Message: use PyAPI_FUNC instead of DL_IMPORT. are we going to deprecate the old non-Py PREFIXED macros, etc? Index: compile.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/compile.h,v retrieving revision 2.43 retrieving revision 2.44 diff -u -d -r2.43 -r2.44 --- compile.h 21 Oct 2005 14:58:06 -0000 2.43 +++ compile.h 23 Oct 2005 19:06:02 -0000 2.44 @@ -24,9 +24,9 @@ #define FUTURE_DIVISION "division" struct _mod; /* Declare the existence of this type */ -DL_IMPORT(PyCodeObject *) PyAST_Compile(struct _mod *, const char *, +PyAPI_FUNC(PyCodeObject *) PyAST_Compile(struct _mod *, const char *, PyCompilerFlags *); -DL_IMPORT(PyFutureFeatures *) PyFuture_FromAST(struct _mod *, const char *); +PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromAST(struct _mod *, const char *); #define ERR_LATE_FUTURE \ "from __future__ imports must occur at the beginning of the file" From nnorwitz at users.sourceforge.net Sun Oct 23 21:22:52 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Sun, 23 Oct 2005 21:22:52 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python Python-ast.c,2.1,2.2 Message-ID: <20051023192252.C7D081E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29849/Python Modified Files: Python-ast.c Log Message: Use PyErr_NoMemory() instead of rolling our own. Get rid of "int i" unused warnings from Python-ast.c which we are generating. Index: Python-ast.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/Python-ast.c,v retrieving revision 2.1 retrieving revision 2.2 diff -u -d -r2.1 -r2.2 --- Python-ast.c 20 Oct 2005 19:59:25 -0000 2.1 +++ Python-ast.c 23 Oct 2005 19:22:48 -0000 2.2 @@ -1,4 +1,4 @@ -/* File automatically generated by ../Parser/asdl_c.py */ +/* File automatically generated by ./Parser/asdl_c.py */ #include "Python.h" #include "Python-ast.h" @@ -9,7 +9,7 @@ mod_ty p; p = (mod_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Module_kind; @@ -23,7 +23,7 @@ mod_ty p; p = (mod_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Interactive_kind; @@ -42,7 +42,7 @@ } p = (mod_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Expression_kind; @@ -56,7 +56,7 @@ mod_ty p; p = (mod_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Suite_kind; @@ -81,7 +81,7 @@ } p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = FunctionDef_kind; @@ -104,7 +104,7 @@ } p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = ClassDef_kind; @@ -121,7 +121,7 @@ stmt_ty p; p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Return_kind; @@ -136,7 +136,7 @@ stmt_ty p; p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Delete_kind; @@ -156,7 +156,7 @@ } p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Assign_kind; @@ -187,7 +187,7 @@ } p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = AugAssign_kind; @@ -204,7 +204,7 @@ stmt_ty p; p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Print_kind; @@ -232,7 +232,7 @@ } p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = For_kind; @@ -255,7 +255,7 @@ } p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = While_kind; @@ -277,7 +277,7 @@ } p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = If_kind; @@ -294,7 +294,7 @@ stmt_ty p; p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Raise_kind; @@ -311,7 +311,7 @@ stmt_ty p; p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = TryExcept_kind; @@ -328,7 +328,7 @@ stmt_ty p; p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = TryFinally_kind; @@ -349,7 +349,7 @@ } p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Assert_kind; @@ -365,7 +365,7 @@ stmt_ty p; p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Import_kind; @@ -385,7 +385,7 @@ } p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = ImportFrom_kind; @@ -406,7 +406,7 @@ } p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Exec_kind; @@ -423,7 +423,7 @@ stmt_ty p; p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Global_kind; @@ -443,7 +443,7 @@ } p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Expr_kind; @@ -458,7 +458,7 @@ stmt_ty p; p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Pass_kind; @@ -472,7 +472,7 @@ stmt_ty p; p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Break_kind; @@ -486,7 +486,7 @@ stmt_ty p; p = (stmt_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Continue_kind; @@ -505,7 +505,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = BoolOp_kind; @@ -536,7 +536,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = BinOp_kind; @@ -563,7 +563,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = UnaryOp_kind; @@ -589,7 +589,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Lambda_kind; @@ -605,7 +605,7 @@ expr_ty p; p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Dict_kind; @@ -626,7 +626,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = ListComp_kind; @@ -647,7 +647,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = GeneratorExp_kind; @@ -663,7 +663,7 @@ expr_ty p; p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Yield_kind; @@ -683,7 +683,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Compare_kind; @@ -706,7 +706,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Call_kind; @@ -730,7 +730,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Repr_kind; @@ -750,7 +750,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Num_kind; @@ -770,7 +770,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Str_kind; @@ -800,7 +800,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Attribute_kind; @@ -832,7 +832,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Subscript_kind; @@ -859,7 +859,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Name_kind; @@ -880,7 +880,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = List_kind; @@ -901,7 +901,7 @@ } p = (expr_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Tuple_kind; @@ -917,7 +917,7 @@ slice_ty p; p = (slice_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Ellipsis_kind; @@ -930,7 +930,7 @@ slice_ty p; p = (slice_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Slice_kind; @@ -946,7 +946,7 @@ slice_ty p; p = (slice_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = ExtSlice_kind; @@ -965,7 +965,7 @@ } p = (slice_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->kind = Index_kind; @@ -989,7 +989,7 @@ } p = (comprehension_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->target = target; @@ -1004,7 +1004,7 @@ excepthandler_ty p; p = (excepthandler_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->type = type; @@ -1020,7 +1020,7 @@ arguments_ty p; p = (arguments_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->args = args; @@ -1046,7 +1046,7 @@ } p = (keyword_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->arg = arg; @@ -1065,7 +1065,7 @@ } p = (alias_ty)malloc(sizeof(*p)); if (!p) { - PyErr_SetString(PyExc_MemoryError, "no memory"); + PyErr_NoMemory(); return NULL; } p->name = name; @@ -1992,7 +1992,6 @@ int marshal_write_expr_context(PyObject **buf, int *off, expr_context_ty o) { - int i; switch (o) { case Load: marshal_write_int(buf, off, 1); @@ -2067,7 +2066,6 @@ int marshal_write_boolop(PyObject **buf, int *off, boolop_ty o) { - int i; switch (o) { case And: marshal_write_int(buf, off, 1); @@ -2082,7 +2080,6 @@ int marshal_write_operator(PyObject **buf, int *off, operator_ty o) { - int i; switch (o) { case Add: marshal_write_int(buf, off, 1); @@ -2127,7 +2124,6 @@ int marshal_write_unaryop(PyObject **buf, int *off, unaryop_ty o) { - int i; switch (o) { case Invert: marshal_write_int(buf, off, 1); @@ -2148,7 +2144,6 @@ int marshal_write_cmpop(PyObject **buf, int *off, cmpop_ty o) { - int i; switch (o) { case Eq: marshal_write_int(buf, off, 1); @@ -2258,7 +2253,6 @@ int marshal_write_keyword(PyObject **buf, int *off, keyword_ty o) { - int i; marshal_write_identifier(buf, off, o->arg); marshal_write_expr(buf, off, o->value); return 1; @@ -2267,7 +2261,6 @@ int marshal_write_alias(PyObject **buf, int *off, alias_ty o) { - int i; marshal_write_identifier(buf, off, o->name); if (o->asname) { marshal_write_int(buf, off, 1); From akuchling at users.sourceforge.net Sun Oct 23 23:49:36 2005 From: akuchling at users.sourceforge.net (akuchling@users.sourceforge.net) Date: Sun, 23 Oct 2005 23:49:36 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/whatsnew Makefile,NONE,1.1 Message-ID: <20051023214936.5C3F41E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/whatsnew In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26920 Added Files: Makefile Log Message: Add convenience makefile to check the latest what's new. The invocation of Python is MacOS-specific; not sure how to make it platform indepedent (but maybe it doesn't matter) --- NEW FILE: Makefile --- check: ../../python.exe ../../Tools/scripts/texcheck.py whatsnew25.tex From akuchling at users.sourceforge.net Sun Oct 23 23:53:02 2005 From: akuchling at users.sourceforge.net (akuchling@users.sourceforge.net) Date: Sun, 23 Oct 2005 23:53:02 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Doc/whatsnew whatsnew25.tex, 1.20, 1.21 Message-ID: <20051023215302.ACF921E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Doc/whatsnew In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27568 Modified Files: whatsnew25.tex Log Message: Add paragraphs on AST branch. AST'ers, please suggest corrections Index: whatsnew25.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/whatsnew/whatsnew25.tex,v retrieving revision 1.20 retrieving revision 1.21 diff -u -d -r1.20 -r1.21 --- whatsnew25.tex 29 Aug 2005 13:30:12 -0000 1.20 +++ whatsnew25.tex 23 Oct 2005 21:52:59 -0000 1.21 @@ -447,6 +447,31 @@ \begin{itemize} +\item The design of the bytecode compiler has changed a great deal, no +longer generating bytecode by traversing the parse tree. Instead +the parse tree is converted to an abstract syntax tree (or AST), and it is +the abstract syntax tree that's traversed to produce the bytecode. + +No documentation has been written for the AST code yet. To start +learning about it, read the definition of the various AST nodes in +\file{Parser/Python.asdl}. A Python script reads this file and +generates a set of C structure definitions in +\file{Include/Python-ast.h}. The \cfunction{PyParser_ASTFromString()} +and \cfunction{PyParser_ASTFromFile()}, defined in +\file{Include/pythonrun.h}, take Python source as input and return the +root of an AST representing the contents. This AST can then be turned +into a code object by \cfunction{PyAST_Compile()}. For more +information, read the source code, and then ask questions on +python-dev. + +% List of names taken from Jeremy's python-dev post at +% http://mail.python.org/pipermail/python-dev/2005-October/057500.html +The AST code was developed under Jeremy Hylton's management, and +implemented by (in alphabetical order) Brett Cannon, Nick Coghlan, +Grant Edwards, John Ehresman, Kurt Kaiser, Neal Norwitz, Tim Peters, +Armin Rigo, and Neil Schemenauer, plus the participants in a number of +AST sprints at conferences such as PyCon. + \item The built-in set types now have an official C API. Call \cfunction{PySet_New()} and \cfunction{PyFrozenSet_New()} to create a new set, \cfunction{PySet_Add()} and \cfunction{PySet_Discard()} to From vsajip at users.sourceforge.net Mon Oct 24 00:33:02 2005 From: vsajip at users.sourceforge.net (vsajip@users.sourceforge.net) Date: Mon, 24 Oct 2005 00:33:02 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/logging __init__.py, 1.33, 1.34 Message-ID: <20051023223302.F1D1C1E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/logging In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5283 Modified Files: __init__.py Log Message: One-off "No handlers..." error message only raised if raiseExceptions is set. Index: __init__.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/logging/__init__.py,v retrieving revision 1.33 retrieving revision 1.34 diff -u -d -r1.33 -r1.34 --- __init__.py 21 Oct 2005 06:00:24 -0000 1.33 +++ __init__.py 23 Oct 2005 22:32:59 -0000 1.34 @@ -1123,7 +1123,7 @@ c = None #break out else: c = c.parent - if (found == 0) and not self.manager.emittedNoHandlerWarning: + if (found == 0) and raiseExceptions and not self.manager.emittedNoHandlerWarning: sys.stderr.write("No handlers could be found for logger" " \"%s\"\n" % self.name) self.manager.emittedNoHandlerWarning = 1 From vsajip at users.sourceforge.net Mon Oct 24 00:36:56 2005 From: vsajip at users.sourceforge.net (vsajip@users.sourceforge.net) Date: Mon, 24 Oct 2005 00:36:56 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/logging __init__.py, 1.24.2.5, 1.24.2.6 Message-ID: <20051023223656.2B7221E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/logging In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6261 Modified Files: Tag: release24-maint __init__.py Log Message: One-off "No handlers..." error message only raised if raiseExceptions is set. Index: __init__.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/logging/__init__.py,v retrieving revision 1.24.2.5 retrieving revision 1.24.2.6 diff -u -d -r1.24.2.5 -r1.24.2.6 --- __init__.py 21 Oct 2005 06:00:58 -0000 1.24.2.5 +++ __init__.py 23 Oct 2005 22:36:53 -0000 1.24.2.6 @@ -1123,7 +1123,7 @@ c = None #break out else: c = c.parent - if (found == 0) and not self.manager.emittedNoHandlerWarning: + if (found == 0) and raiseExceptions and not self.manager.emittedNoHandlerWarning: sys.stderr.write("No handlers could be found for logger" " \"%s\"\n" % self.name) self.manager.emittedNoHandlerWarning = 1 From bwarsaw at users.sourceforge.net Mon Oct 24 00:39:21 2005 From: bwarsaw at users.sourceforge.net (bwarsaw@users.sourceforge.net) Date: Mon, 24 Oct 2005 00:39:21 +0200 (CEST) Subject: [Python-checkins] python/nondist/peps pep-0351.txt, NONE, 1.1 pep-0000.txt, 1.350, 1.351 Message-ID: <20051023223921.226CC1E4004@bag.python.org> Update of /cvsroot/python/python/nondist/peps In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6574 Modified Files: pep-0000.txt Added Files: pep-0351.txt Log Message: PEP 351, the freeze protocol. --- NEW FILE: pep-0351.txt --- PEP: 351 Title: The freeze protocol Version: 2.5 Author: Barry A. Warsaw Status: Active Type: Standards Track Content-Type: text/x-rst Created: 14-Apr-2005 Abstract ======== This PEP describes a simple protocol for requesting a frozen, immutable copy of a mutable object. It also defines a new built-in function which uses this protocol to provide an immutable copy on any cooperating object. Rationale ========= Built-in objects such dictionaries and sets accept only immutable objects as keys. This means that mutable objects like lists cannot be used as keys to a dictionary. However, a Python programmer can convert a list to a tuple; the two objects are similar, but the latter is immutable, and can be used as a dictionary key. It is conceivable that third party objects also have similar mutable and immutable counterparts, and it would be useful to have a standard protocol for conversion of such objects. sets.Set objects expose a "protocol for automatic conversion to immutable" so that you can create sets.Sets of sets.Sets. PEP 218 deliberately dropped this feature from built-in sets. This PEP advances that the feature is still useful and proposes a standard mechanism for its support. Proposal ======== It is proposed that a new built-in function called freeze() is added. If freeze() is passed an immutable object, as determined by hash() on that object not raising a TypeError, then the object is returned directly. If freeze() is passed a mutable object (i.e. hash() of that object raises a TypeError), then freeze() will call that object's __freeze__() method to get an immutable copy. If the object does not have a __freeze__() method, then a TypeError is raised. Sample implementations ====================== Here is a Python implementation of the freeze() built-in:: def freeze(obj): try: hash(obj) return obj except TypeError: freezer = getattr(obj, '__freeze__', None) if freezer: return freezer() raise TypeError('object is not freezable')`` Here are some code samples which show the intended semantics:: class xset(set): def __freeze__(self): return frozenset(self) class xlist(list): def __freeze__(self): return tuple(self) class imdict(dict): def __hash__(self): return id(self) def _immutable(self, *args, **kws): raise TypeError('object is immutable') __setitem__ = _immutable __delitem__ = _immutable clear = _immutable update = _immutable setdefault = _immutable pop = _immutable popitem = _immutable class xdict(dict): def __freeze__(self): return imdict(self) >>> s = set([1, 2, 3]) >>> {s: 4} Traceback (most recent call last): File "", line 1, in ? TypeError: set objects are unhashable >>> t = freeze(s) Traceback (most recent call last): File "", line 1, in ? File "/usr/tmp/python-lWCjBK.py", line 9, in freeze TypeError: object is not freezable >>> t = xset(s) >>> u = freeze(t) >>> {u: 4} {frozenset([1, 2, 3]): 4} >>> x = 'hello' >>> freeze(x) is x True >>> d = xdict(a=7, b=8, c=9) >>> hash(d) Traceback (most recent call last): File "", line 1, in ? TypeError: dict objects are unhashable >>> hash(freeze(d)) -1210776116 >>> {d: 4} Traceback (most recent call last): File "", line 1, in ? TypeError: dict objects are unhashable >>> {freeze(d): 4} {{'a': 7, 'c': 9, 'b': 8}: 4} Reference implementation ======================== Patch 1335812_ provides the C implementation of this feature. It adds the freeze() built-in, along with implementations of the __freeze__() method for lists and sets. Dictionaries are not easily freezable in current Python, so an implementation of dict.__freeze__() is not provided yet. .. _1335812: http://sourceforge.net/tracker/index.php?func=detail&aid=1335812&group_id=5470&atid=305470 Open issues =========== - Should we define a similar protocol for thawing frozen objects? - Should dicts and sets automatically freeze their mutable keys? - Should we support "temporary freezing" (perhaps with a method called __congeal__()) a la __as_temporarily_immutable__() in sets.Set? - For backward compatibility with sets.Set, should we support __as_immutable__()? Or should __freeze__() just be renamed to __as_immutable__()? Copyright ========= This document has been placed in the public domain. .. Local Variables: mode: indented-text indent-tabs-mode: nil sentence-end-double-space: t fill-column: 70 End: Index: pep-0000.txt =================================================================== RCS file: /cvsroot/python/python/nondist/peps/pep-0000.txt,v retrieving revision 1.350 retrieving revision 1.351 diff -u -d -r1.350 -r1.351 --- pep-0000.txt 17 Oct 2005 09:15:06 -0000 1.350 +++ pep-0000.txt 23 Oct 2005 22:39:17 -0000 1.351 @@ -106,6 +106,7 @@ P 347 Migrating the Python CVS to Subversion von Lwis S 349 Allow str() to return unicode strings Schemenauer I 350 Codetags Elliott + S 351 The freeze protocol Warsaw S 754 IEEE 754 Floating Point Special Values Warnes Finished PEPs (done, implemented in CVS) @@ -396,6 +397,7 @@ SR 348 Exception Reorganization for Python 3.0 Cannon S 349 Allow str() to return unicode strings Schemenauer I 350 Codetags Elliott + S 351 The freeze protocol Warsaw SR 666 Reject Foolish Indentation Creighton S 754 IEEE 754 Floating Point Special Values Warnes I 3000 Python 3.0 Plans Kuchling, Cannon From nnorwitz at users.sourceforge.net Mon Oct 24 00:40:50 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 24 Oct 2005 00:40:50 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python compile.c,2.360,2.361 Message-ID: <20051023224050.7EE311E4019@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6798/Python Modified Files: compile.c Log Message: cleanup a bit and reuse instrsize (instruction size). working towards fixing problems with EXTENDED_ARG Index: compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.360 retrieving revision 2.361 diff -u -d -r2.360 -r2.361 --- compile.c 23 Oct 2005 18:52:36 -0000 2.360 +++ compile.c 23 Oct 2005 22:40:47 -0000 2.361 @@ -3696,13 +3696,11 @@ static int instrsize(struct instr *instr) { - int size = 1; - if (instr->i_hasarg) { - size += 2; - if (instr->i_oparg >> 16) - size += 2; - } - return size; + if (!instr->i_hasarg) + return 1; + if (instr->i_oparg > 0xffff) + return 6; + return 3; } static int @@ -3851,42 +3849,40 @@ static int assemble_emit(struct assembler *a, struct instr *i) { - int arg = 0, size = 0, ext = i->i_oparg >> 16; + int size, arg = 0, ext = 0; int len = PyString_GET_SIZE(a->a_bytecode); char *code; - if (!i->i_hasarg) - size = 1; - else { - if (ext) - size = 6; - else - size = 3; + size = instrsize(i); + if (i->i_hasarg) { arg = i->i_oparg; + ext = arg >> 16; } if (i->i_lineno && !assemble_lnotab(a, i)) - return 0; + return 0; if (a->a_offset + size >= len) { if (_PyString_Resize(&a->a_bytecode, len * 2) < 0) return 0; } code = PyString_AS_STRING(a->a_bytecode) + a->a_offset; a->a_offset += size; - if (ext > 0) { - *code++ = (char)EXTENDED_ARG; - *code++ = ext & 0xff; - *code++ = ext >> 8; - arg &= 0xffff; + if (size == 6) { + assert(i->i_hasarg); + *code++ = (char)EXTENDED_ARG; + *code++ = ext & 0xff; + *code++ = ext >> 8; + arg &= 0xffff; } *code++ = i->i_opcode; - if (size == 1) - return 1; - *code++ = arg & 0xff; - *code++ = arg >> 8; + if (i->i_hasarg) { + assert(size == 3 || size == 6); + *code++ = arg & 0xff; + *code++ = arg >> 8; + } return 1; } -static int +static void assemble_jump_offsets(struct assembler *a, struct compiler *c) { basicblock *b; @@ -3896,7 +3892,7 @@ /* Compute the size of each block and fixup jump args. Replace block pointer with position in bytecode. */ for (i = a->a_nblocks - 1; i >= 0; i--) { - basicblock *b = a->a_postorder[i]; + b = a->a_postorder[i]; bsize = blocksize(b); b->b_offset = totsize; totsize += bsize; @@ -3918,7 +3914,6 @@ } } } - return 1; } static PyObject * @@ -4079,8 +4074,7 @@ dfs(c, entryblock, &a); /* Can't modify the bytecode after computing jump offsets. */ - if (!assemble_jump_offsets(&a, c)) - goto error; + assemble_jump_offsets(&a, c); /* Emit code in reverse postorder from dfs. */ for (i = a.a_nblocks - 1; i >= 0; i--) { From nnorwitz at users.sourceforge.net Mon Oct 24 01:00:47 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 24 Oct 2005 01:00:47 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python compile.c,2.361,2.362 Message-ID: <20051023230047.9D7331E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9860/Python Modified Files: compile.c Log Message: Fix problem handling EXTENDED_ARGs from SF bug # 1333982 Index: compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.361 retrieving revision 2.362 diff -u -d -r2.361 -r2.362 --- compile.c 23 Oct 2005 22:40:47 -0000 2.361 +++ compile.c 23 Oct 2005 23:00:41 -0000 2.362 @@ -3886,17 +3886,20 @@ assemble_jump_offsets(struct assembler *a, struct compiler *c) { basicblock *b; - int bsize, totsize = 0; + int bsize, totsize, extended_arg_count, last_extended_arg_count = 0; int i; /* Compute the size of each block and fixup jump args. Replace block pointer with position in bytecode. */ +start: + totsize = 0; for (i = a->a_nblocks - 1; i >= 0; i--) { b = a->a_postorder[i]; bsize = blocksize(b); b->b_offset = totsize; totsize += bsize; } + extended_arg_count = 0; for (b = c->u->u_blocks; b != NULL; b = b->b_list) { bsize = b->b_offset; for (i = 0; i < b->b_iused; i++) { @@ -3912,8 +3915,34 @@ int delta = instr->i_target->b_offset - bsize; instr->i_oparg = delta; } + else + continue; + if (instr->i_oparg > 0xffff) + extended_arg_count++; } } + + /* XXX: This is an awful hack that could hurt performance, but + on the bright side it should work until we come up + with a better solution. + + In the meantime, should the goto be dropped in favor + of a loop? + + The issue is that in the first loop blocksize() is called + which calls instrsize() which requires i_oparg be set + appropriately. There is a bootstrap problem because + i_oparg is calculated in the second loop above. + + So we loop until we stop seeing new EXTENDED_ARGs. + The only EXTENDED_ARGs that could be popping up are + ones in jump instructions. So this should converge + fairly quickly. + */ + if (last_extended_arg_count != extended_arg_count) { + last_extended_arg_count = extended_arg_count; + goto start; + } } static PyObject * From nnorwitz at users.sourceforge.net Mon Oct 24 02:01:40 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 24 Oct 2005 02:01:40 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_compiler.py, 1.13, 1.14 Message-ID: <20051024000140.D23801E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22700/Lib/test Modified Files: test_compiler.py Log Message: Fix compiler test when run with -u (long mode) Index: test_compiler.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_compiler.py,v retrieving revision 1.13 retrieving revision 1.14 diff -u -d -r1.13 -r1.14 --- test_compiler.py 2 Jun 2005 05:55:19 -0000 1.13 +++ test_compiler.py 24 Oct 2005 00:01:37 -0000 1.14 @@ -28,7 +28,7 @@ f = open(path, "U") buf = f.read() f.close() - if "badsyntax" in basename: + if "badsyntax" in basename or "bad_coding" in basename: self.assertRaises(SyntaxError, compiler.compile, buf, basename, "exec") else: From nnorwitz at users.sourceforge.net Mon Oct 24 02:08:14 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 24 Oct 2005 02:08:14 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/test test_compile.py, 1.24, 1.25 Message-ID: <20051024000814.66C3C1E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24267/Lib/test Modified Files: test_compile.py Log Message: Fix problem handling EXTENDED_ARGs from SF bug # 1333982 Index: test_compile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/test_compile.py,v retrieving revision 1.24 retrieving revision 1.25 diff -u -d -r1.24 -r1.25 --- test_compile.py 24 Oct 2004 00:10:06 -0000 1.24 +++ test_compile.py 24 Oct 2005 00:08:10 -0000 1.25 @@ -101,6 +101,29 @@ exec 'z = a' in g, d self.assertEqual(d['z'], 12) + def test_extended_arg(self): + longexpr = 'x = x or ' + '-x' * 2500 + code = ''' +def f(x): + %s + %s + %s + %s + %s + %s + %s + %s + %s + %s + # the expressions above have no effect, x == argument + while x: + x -= 1 + # EXTENDED_ARG/JUMP_ABSOLUTE here + return x +''' % ((longexpr,)*10) + exec code + self.assertEqual(f(5), 0) + def test_complex_args(self): def comp_args((a, b)): From nnorwitz at users.sourceforge.net Mon Oct 24 02:30:47 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 24 Oct 2005 02:30:47 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Include symtable.h,2.16,2.17 Message-ID: <20051024003047.93E801E4004@bag.python.org> Update of /cvsroot/python/python/dist/src/Include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29439/Include Modified Files: symtable.h Log Message: Do not pollute name block_ty, prefix with _Py_ Index: symtable.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/symtable.h,v retrieving revision 2.16 retrieving revision 2.17 diff -u -d -r2.16 -r2.17 --- symtable.h 23 Oct 2005 18:37:16 -0000 2.16 +++ symtable.h 24 Oct 2005 00:30:44 -0000 2.17 @@ -5,7 +5,7 @@ #endif typedef enum _block_type { FunctionBlock, ClassBlock, ModuleBlock } - block_ty; + _Py_block_ty; struct _symtable_entry; @@ -29,7 +29,7 @@ PyObject *ste_name; /* string: name of block */ PyObject *ste_varnames; /* list of variable names */ PyObject *ste_children; /* list of child ids */ - block_ty ste_type; /* module, class, or function */ + _Py_block_ty ste_type; /* module, class, or function */ int ste_unoptimized; /* false if namespace is optimized */ int ste_nested : 1; /* true if block is nested */ int ste_free : 1; /* true if block has free variables */ @@ -49,7 +49,7 @@ #define PySTEntry_Check(op) ((op)->ob_type == &PySTEntry_Type) PyAPI_FUNC(PySTEntryObject *) \ - PySTEntry_New(struct symtable *, identifier, block_ty, void *, int); + PySTEntry_New(struct symtable *, identifier, _Py_block_ty, void *, int); PyAPI_FUNC(int) PyST_GetScope(PySTEntryObject *, PyObject *); PyAPI_FUNC(struct symtable *) PySymtable_Build(mod_ty, const char *, From nnorwitz at users.sourceforge.net Mon Oct 24 02:30:47 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 24 Oct 2005 02:30:47 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python symtable.c,2.17,2.18 Message-ID: <20051024003047.95E3E1E4005@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29439/Python Modified Files: symtable.c Log Message: Do not pollute name block_ty, prefix with _Py_ Index: symtable.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/symtable.c,v retrieving revision 2.17 retrieving revision 2.18 diff -u -d -r2.17 -r2.18 --- symtable.c 23 Oct 2005 18:50:36 -0000 2.17 +++ symtable.c 24 Oct 2005 00:30:44 -0000 2.18 @@ -12,7 +12,7 @@ "name '%.400s' is used prior to global declaration" PySTEntryObject * -PySTEntry_New(struct symtable *st, identifier name, block_ty block, +PySTEntry_New(struct symtable *st, identifier name, _Py_block_ty block, void *key, int lineno) { PySTEntryObject *ste = NULL; @@ -153,7 +153,7 @@ static int symtable_analyze(struct symtable *st); static int symtable_warn(struct symtable *st, char *msg); static int symtable_enter_block(struct symtable *st, identifier name, - block_ty block, void *ast, int lineno); + _Py_block_ty block, void *ast, int lineno); static int symtable_exit_block(struct symtable *st, void *ast); static int symtable_visit_stmt(struct symtable *st, stmt_ty s); static int symtable_visit_expr(struct symtable *st, expr_ty s); @@ -711,7 +711,7 @@ } static int -symtable_enter_block(struct symtable *st, identifier name, block_ty block, +symtable_enter_block(struct symtable *st, identifier name, _Py_block_ty block, void *ast, int lineno) { PySTEntryObject *prev = NULL; From nnorwitz at users.sourceforge.net Mon Oct 24 03:07:51 2005 From: nnorwitz at users.sourceforge.net (nnorwitz@users.sourceforge.net) Date: Mon, 24 Oct 2005 03:07:51 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Include ast.h,2.1,2.2 Message-ID: <20051024010751.5D73E1E400B@bag.python.org> Update of /cvsroot/python/python/dist/src/Include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3264/Include Modified Files: ast.h Log Message: use PyAPI_FUNC instead of DL_IMPORT. are we going to deprecate the old non-Py PREFIXED macros, etc? Index: ast.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/ast.h,v retrieving revision 2.1 retrieving revision 2.2 diff -u -d -r2.1 -r2.2 --- ast.h 20 Oct 2005 19:59:24 -0000 2.1 +++ ast.h 24 Oct 2005 01:07:47 -0000 2.2 @@ -4,8 +4,8 @@ extern "C" { #endif -extern DL_IMPORT(mod_ty) PyAST_FromNode(const node *, PyCompilerFlags *flags, - const char *); +PyAPI_FUNC(mod_ty) PyAST_FromNode(const node *, PyCompilerFlags *flags, + const char *); #ifdef __cplusplus } From lemburg at users.sourceforge.net Mon Oct 24 14:07:56 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Mon, 24 Oct 2005 14:07:56 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/encodings cp037.py, 1.5, 1.6 cp1006.py, 1.5, 1.6 cp1026.py, 1.5, 1.6 cp1140.py, 1.2, 1.3 cp1250.py, 1.5, 1.6 cp1251.py, 1.5, 1.6 cp1252.py, 1.5, 1.6 cp1253.py, 1.5, 1.6 cp1254.py, 1.5, 1.6 cp1255.py, 1.5, 1.6 cp1256.py, 1.5, 1.6 cp1257.py, 1.5, 1.6 cp1258.py, 1.5, 1.6 cp424.py, 1.5, 1.6 cp500.py, 1.5, 1.6 cp856.py, 1.6, 1.7 cp874.py, 1.5, 1.6 cp875.py, 1.5, 1.6 iso8859_1.py, 1.5, 1.6 iso8859_10.py, 1.5, 1.6 iso8859_11.py, 1.3, 1.4 iso8859_13.py, 1.5, 1.6 iso8859_14.py, 1.5, 1.6 iso8859_15.py, 1.5, 1.6 iso8859_16.py, 1.3, 1.4 iso8859_2.py, 1.5, 1.6 iso8859_3.py, 1.5, 1.6 iso8859_4.py, 1.5, 1.6 iso8859_5.py, 1.5, 1.6 iso8859_6.py, 1.5, 1.6 iso8859_7.py, 1.5, 1.6 iso8859_8.py, 1.5, 1.6 iso8859_9.py, 1.5, 1.6 koi8_r.py, 1.5, 1.6 koi8_u.py, 1.2, 1.3 mac_centeuro.py, 1.1, 1.2 mac_croatian.py, 1.1, 1.2 mac_cyrillic.py, 1.5, 1.6 mac_farsi.py, 1.1, 1.2 mac_greek.py, 1.5, 1.6 mac_iceland.py, 1.5, 1.6 mac_roman.py, 1.5, 1.6 mac_romanian.py, 1.1, 1.2 mac_turkish.py, 1.5, 1.6 tis_620.py, 1.2, 1.3 Message-ID: <20051024120756.8CA3E1E4045@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/encodings In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31511 Modified Files: cp037.py cp1006.py cp1026.py cp1140.py cp1250.py cp1251.py cp1252.py cp1253.py cp1254.py cp1255.py cp1256.py cp1257.py cp1258.py cp424.py cp500.py cp856.py cp874.py cp875.py iso8859_1.py iso8859_10.py iso8859_11.py iso8859_13.py iso8859_14.py iso8859_15.py iso8859_16.py iso8859_2.py iso8859_3.py iso8859_4.py iso8859_5.py iso8859_6.py iso8859_7.py iso8859_8.py iso8859_9.py koi8_r.py koi8_u.py mac_centeuro.py mac_croatian.py mac_cyrillic.py mac_farsi.py mac_greek.py mac_iceland.py mac_roman.py mac_romanian.py mac_turkish.py tis_620.py Log Message: Removed the decoding_map from the codecs where this is possible. Replaced the tis_620, cp1140 and koi8_u codecs with new ones based on custom mapping files. Index: cp037.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp037.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp037.py 21 Oct 2005 14:35:35 -0000 1.5 +++ cp037.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/EBCDIC/CP037.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT' with gencodec.py. """#" @@ -28,766 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1254 lines suppressed...] + 0x00ee: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x8c, # LATIN SMALL LETTER ETH (ICELANDIC) + 0x00f1: 0x49, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0xcd, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0xce, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0xcb, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0xcf, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0xcc, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0xe1, # DIVISION SIGN + 0x00f8: 0x70, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0xdd, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0xde, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0xdb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0xdc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x8d, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x8e, # LATIN SMALL LETTER THORN (ICELANDIC) + 0x00ff: 0xdf, # LATIN SMALL LETTER Y WITH DIAERESIS } \ No newline at end of file Index: cp1006.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1006.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp1006.py 21 Oct 2005 13:49:12 -0000 1.5 +++ cp1006.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MISC/CP1006.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MISC/CP1006.TXT' with gencodec.py. """#" @@ -28,623 +28,524 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1109 lines suppressed...] + 0xfed0: 0xde, # ARABIC LETTER GHAIN MEDIAL FORM + 0xfed1: 0xdf, # ARABIC LETTER FEH ISOLATED FORM + 0xfed3: 0xe0, # ARABIC LETTER FEH INITIAL FORM + 0xfed5: 0xe1, # ARABIC LETTER QAF ISOLATED FORM + 0xfed7: 0xe2, # ARABIC LETTER QAF INITIAL FORM + 0xfed9: 0xe3, # ARABIC LETTER KAF ISOLATED FORM + 0xfedb: 0xe4, # ARABIC LETTER KAF INITIAL FORM + 0xfedd: 0xe7, # ARABIC LETTER LAM ISOLATED FORM + 0xfedf: 0xe8, # ARABIC LETTER LAM INITIAL FORM + 0xfee0: 0xe9, # ARABIC LETTER LAM MEDIAL FORM + 0xfee1: 0xea, # ARABIC LETTER MEEM ISOLATED FORM + 0xfee3: 0xeb, # ARABIC LETTER MEEM INITIAL FORM + 0xfee5: 0xed, # ARABIC LETTER NOON ISOLATED FORM + 0xfee7: 0xee, # ARABIC LETTER NOON INITIAL FORM + 0xfeed: 0xf0, # ARABIC LETTER WAW ISOLATED FORM + 0xfef1: 0xf9, # ARABIC LETTER YEH ISOLATED FORM + 0xfef2: 0xfa, # ARABIC LETTER YEH FINAL FORM + 0xfef3: 0xfb, # ARABIC LETTER YEH INITIAL FORM } \ No newline at end of file Index: cp1026.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1026.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp1026.py 21 Oct 2005 14:35:35 -0000 1.5 +++ cp1026.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/EBCDIC/CP1026.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT' with gencodec.py. """#" @@ -28,766 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1254 lines suppressed...] + 0x00f2: 0xcd, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0xce, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0xcb, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0xcf, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0xa1, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0xe1, # DIVISION SIGN + 0x00f8: 0x70, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0xdd, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0xde, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0xdb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0xe0, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0xdf, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011e: 0x5a, # LATIN CAPITAL LETTER G WITH BREVE + 0x011f: 0xd0, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0x5b, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0x79, # LATIN SMALL LETTER DOTLESS I + 0x015e: 0x7c, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x6a, # LATIN SMALL LETTER S WITH CEDILLA } \ No newline at end of file Index: cp1140.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1140.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- cp1140.py 8 Aug 2002 20:19:18 -0000 1.2 +++ cp1140.py 24 Oct 2005 12:07:48 -0000 1.3 @@ -1,11 +1,8 @@ -""" Python Character Mapping Codec for cp1140 +""" Python Character Mapping Codec generated from 'python-mappings/CP1140.TXT' with gencodec.py. -Written by Brian Quinlan(brian at sweetapp.com). NO WARRANTY. -""" +"""#" import codecs -import copy -import cp037 ### Codec APIs @@ -17,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -31,14 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map -decoding_map = copy.copy(cp037.decoding_map) +### Decoding Table -decoding_map.update({ - 0x009f: 0x20ac # EURO SIGN -}) +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x9c' # 0x04 -> CONTROL + u'\t' # 0x05 -> HORIZONTAL TABULATION + u'\x86' # 0x06 -> CONTROL + u'\x7f' # 0x07 -> DELETE + u'\x97' # 0x08 -> CONTROL + u'\x8d' # 0x09 -> CONTROL + u'\x8e' # 0x0a -> CONTROL + u'\x0b' # 0x0b -> VERTICAL TABULATION + u'\x0c' # 0x0c -> FORM FEED + u'\r' # 0x0d -> CARRIAGE RETURN + u'\x0e' # 0x0e -> SHIFT OUT + u'\x0f' # 0x0f -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x9d' # 0x14 -> CONTROL + u'\x85' # 0x15 -> CONTROL + u'\x08' # 0x16 -> BACKSPACE + u'\x87' # 0x17 -> CONTROL + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x92' # 0x1a -> CONTROL + u'\x8f' # 0x1b -> CONTROL + u'\x1c' # 0x1c -> FILE SEPARATOR + u'\x1d' # 0x1d -> GROUP SEPARATOR + u'\x1e' # 0x1e -> RECORD SEPARATOR + u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x80' # 0x20 -> CONTROL + u'\x81' # 0x21 -> CONTROL + u'\x82' # 0x22 -> CONTROL + u'\x83' # 0x23 -> CONTROL + u'\x84' # 0x24 -> CONTROL + u'\n' # 0x25 -> LINE FEED + u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK + u'\x1b' # 0x27 -> ESCAPE + u'\x88' # 0x28 -> CONTROL + u'\x89' # 0x29 -> CONTROL + u'\x8a' # 0x2a -> CONTROL + u'\x8b' # 0x2b -> CONTROL + u'\x8c' # 0x2c -> CONTROL + u'\x05' # 0x2d -> ENQUIRY + u'\x06' # 0x2e -> ACKNOWLEDGE + u'\x07' # 0x2f -> BELL + u'\x90' # 0x30 -> CONTROL + u'\x91' # 0x31 -> CONTROL + u'\x16' # 0x32 -> SYNCHRONOUS IDLE + u'\x93' # 0x33 -> CONTROL + u'\x94' # 0x34 -> CONTROL + u'\x95' # 0x35 -> CONTROL + u'\x96' # 0x36 -> CONTROL + u'\x04' # 0x37 -> END OF TRANSMISSION + u'\x98' # 0x38 -> CONTROL + u'\x99' # 0x39 -> CONTROL + u'\x9a' # 0x3a -> CONTROL + u'\x9b' # 0x3b -> CONTROL + u'\x14' # 0x3c -> DEVICE CONTROL FOUR + u'\x15' # 0x3d -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3e -> CONTROL + u'\x1a' # 0x3f -> SUBSTITUTE + u' ' # 0x40 -> SPACE + u'\xa0' # 0x41 -> NO-BREAK SPACE + u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE + u'\xa2' # 0x4a -> CENT SIGN + u'.' # 0x4b -> FULL STOP + u'<' # 0x4c -> LESS-THAN SIGN + u'(' # 0x4d -> LEFT PARENTHESIS + u'+' # 0x4e -> PLUS SIGN + u'|' # 0x4f -> VERTICAL LINE + u'&' # 0x50 -> AMPERSAND + u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE + u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE + u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) + u'!' # 0x5a -> EXCLAMATION MARK + u'$' # 0x5b -> DOLLAR SIGN + u'*' # 0x5c -> ASTERISK + u')' # 0x5d -> RIGHT PARENTHESIS + u';' # 0x5e -> SEMICOLON + u'\xac' # 0x5f -> NOT SIGN + u'-' # 0x60 -> HYPHEN-MINUS + u'/' # 0x61 -> SOLIDUS + u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xa6' # 0x6a -> BROKEN BAR + u',' # 0x6b -> COMMA + u'%' # 0x6c -> PERCENT SIGN + u'_' # 0x6d -> LOW LINE + u'>' # 0x6e -> GREATER-THAN SIGN + u'?' # 0x6f -> QUESTION MARK + u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE + u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE + u'`' # 0x79 -> GRAVE ACCENT + u':' # 0x7a -> COLON + u'#' # 0x7b -> NUMBER SIGN + u'@' # 0x7c -> COMMERCIAL AT + u"'" # 0x7d -> APOSTROPHE + u'=' # 0x7e -> EQUALS SIGN + u'"' # 0x7f -> QUOTATION MARK + u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE + u'a' # 0x81 -> LATIN SMALL LETTER A + u'b' # 0x82 -> LATIN SMALL LETTER B + u'c' # 0x83 -> LATIN SMALL LETTER C + u'd' # 0x84 -> LATIN SMALL LETTER D + u'e' # 0x85 -> LATIN SMALL LETTER E + u'f' # 0x86 -> LATIN SMALL LETTER F + u'g' # 0x87 -> LATIN SMALL LETTER G + u'h' # 0x88 -> LATIN SMALL LETTER H + u'i' # 0x89 -> LATIN SMALL LETTER I + u'\xab' # 0x8a -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8b -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xf0' # 0x8c -> LATIN SMALL LETTER ETH (ICELANDIC) + u'\xfd' # 0x8d -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x8e -> LATIN SMALL LETTER THORN (ICELANDIC) + u'\xb1' # 0x8f -> PLUS-MINUS SIGN + u'\xb0' # 0x90 -> DEGREE SIGN + u'j' # 0x91 -> LATIN SMALL LETTER J + u'k' # 0x92 -> LATIN SMALL LETTER K + u'l' # 0x93 -> LATIN SMALL LETTER L + u'm' # 0x94 -> LATIN SMALL LETTER M + u'n' # 0x95 -> LATIN SMALL LETTER N + u'o' # 0x96 -> LATIN SMALL LETTER O + u'p' # 0x97 -> LATIN SMALL LETTER P + u'q' # 0x98 -> LATIN SMALL LETTER Q + u'r' # 0x99 -> LATIN SMALL LETTER R + u'\xaa' # 0x9a -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x9b -> MASCULINE ORDINAL INDICATOR + u'\xe6' # 0x9c -> LATIN SMALL LIGATURE AE + u'\xb8' # 0x9d -> CEDILLA + u'\xc6' # 0x9e -> LATIN CAPITAL LIGATURE AE + u'\u20ac' # 0x9f -> EURO SIGN + u'\xb5' # 0xa0 -> MICRO SIGN + u'~' # 0xa1 -> TILDE + u's' # 0xa2 -> LATIN SMALL LETTER S + u't' # 0xa3 -> LATIN SMALL LETTER T + u'u' # 0xa4 -> LATIN SMALL LETTER U + u'v' # 0xa5 -> LATIN SMALL LETTER V + u'w' # 0xa6 -> LATIN SMALL LETTER W + u'x' # 0xa7 -> LATIN SMALL LETTER X + u'y' # 0xa8 -> LATIN SMALL LETTER Y + u'z' # 0xa9 -> LATIN SMALL LETTER Z + u'\xa1' # 0xaa -> INVERTED EXCLAMATION MARK + u'\xbf' # 0xab -> INVERTED QUESTION MARK + u'\xd0' # 0xac -> LATIN CAPITAL LETTER ETH (ICELANDIC) + u'\xdd' # 0xad -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xae -> LATIN CAPITAL LETTER THORN (ICELANDIC) + u'\xae' # 0xaf -> REGISTERED SIGN + u'^' # 0xb0 -> CIRCUMFLEX ACCENT + u'\xa3' # 0xb1 -> POUND SIGN + u'\xa5' # 0xb2 -> YEN SIGN + u'\xb7' # 0xb3 -> MIDDLE DOT + u'\xa9' # 0xb4 -> COPYRIGHT SIGN + u'\xa7' # 0xb5 -> SECTION SIGN + u'\xb6' # 0xb6 -> PILCROW SIGN + u'\xbc' # 0xb7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xb8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xb9 -> VULGAR FRACTION THREE QUARTERS + u'[' # 0xba -> LEFT SQUARE BRACKET + u']' # 0xbb -> RIGHT SQUARE BRACKET + u'\xaf' # 0xbc -> MACRON + u'\xa8' # 0xbd -> DIAERESIS + u'\xb4' # 0xbe -> ACUTE ACCENT + u'\xd7' # 0xbf -> MULTIPLICATION SIGN + u'{' # 0xc0 -> LEFT CURLY BRACKET + u'A' # 0xc1 -> LATIN CAPITAL LETTER A + u'B' # 0xc2 -> LATIN CAPITAL LETTER B + u'C' # 0xc3 -> LATIN CAPITAL LETTER C + u'D' # 0xc4 -> LATIN CAPITAL LETTER D + u'E' # 0xc5 -> LATIN CAPITAL LETTER E + u'F' # 0xc6 -> LATIN CAPITAL LETTER F + u'G' # 0xc7 -> LATIN CAPITAL LETTER G + u'H' # 0xc8 -> LATIN CAPITAL LETTER H + u'I' # 0xc9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xca -> SOFT HYPHEN + u'\xf4' # 0xcb -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0xcc -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0xcd -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xce -> LATIN SMALL LETTER O WITH ACUTE + u'\xf5' # 0xcf -> LATIN SMALL LETTER O WITH TILDE + u'}' # 0xd0 -> RIGHT CURLY BRACKET + u'J' # 0xd1 -> LATIN CAPITAL LETTER J + u'K' # 0xd2 -> LATIN CAPITAL LETTER K + u'L' # 0xd3 -> LATIN CAPITAL LETTER L + u'M' # 0xd4 -> LATIN CAPITAL LETTER M + u'N' # 0xd5 -> LATIN CAPITAL LETTER N + u'O' # 0xd6 -> LATIN CAPITAL LETTER O + u'P' # 0xd7 -> LATIN CAPITAL LETTER P + u'Q' # 0xd8 -> LATIN CAPITAL LETTER Q + u'R' # 0xd9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xda -> SUPERSCRIPT ONE + u'\xfb' # 0xdb -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xdc -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xf9' # 0xdd -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xde -> LATIN SMALL LETTER U WITH ACUTE + u'\xff' # 0xdf -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\\' # 0xe0 -> REVERSE SOLIDUS + u'\xf7' # 0xe1 -> DIVISION SIGN + u'S' # 0xe2 -> LATIN CAPITAL LETTER S + u'T' # 0xe3 -> LATIN CAPITAL LETTER T + u'U' # 0xe4 -> LATIN CAPITAL LETTER U + u'V' # 0xe5 -> LATIN CAPITAL LETTER V + u'W' # 0xe6 -> LATIN CAPITAL LETTER W + u'X' # 0xe7 -> LATIN CAPITAL LETTER X + u'Y' # 0xe8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xe9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xea -> SUPERSCRIPT TWO + u'\xd4' # 0xeb -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd6' # 0xec -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd2' # 0xed -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xee -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd5' # 0xef -> LATIN CAPITAL LETTER O WITH TILDE + u'0' # 0xf0 -> DIGIT ZERO + u'1' # 0xf1 -> DIGIT ONE + u'2' # 0xf2 -> DIGIT TWO + u'3' # 0xf3 -> DIGIT THREE + u'4' # 0xf4 -> DIGIT FOUR + u'5' # 0xf5 -> DIGIT FIVE + u'6' # 0xf6 -> DIGIT SIX + u'7' # 0xf7 -> DIGIT SEVEN + u'8' # 0xf8 -> DIGIT EIGHT + u'9' # 0xf9 -> DIGIT NINE + u'\xb3' # 0xfa -> SUPERSCRIPT THREE + u'\xdb' # 0xfb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xfc -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xd9' # 0xfd -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xfe -> LATIN CAPITAL LETTER U WITH ACUTE + u'\x9f' # 0xff -> CONTROL +) ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x37, # END OF TRANSMISSION + 0x0005: 0x2d, # ENQUIRY + 0x0006: 0x2e, # ACKNOWLEDGE + 0x0007: 0x2f, # BELL + 0x0008: 0x16, # BACKSPACE + 0x0009: 0x05, # HORIZONTAL TABULATION + 0x000a: 0x25, # LINE FEED + 0x000b: 0x0b, # VERTICAL TABULATION + 0x000c: 0x0c, # FORM FEED + 0x000d: 0x0d, # CARRIAGE RETURN + 0x000e: 0x0e, # SHIFT OUT + 0x000f: 0x0f, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x3c, # DEVICE CONTROL FOUR + 0x0015: 0x3d, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x32, # SYNCHRONOUS IDLE + 0x0017: 0x26, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001a: 0x3f, # SUBSTITUTE + 0x001b: 0x27, # ESCAPE + 0x001c: 0x1c, # FILE SEPARATOR + 0x001d: 0x1d, # GROUP SEPARATOR + 0x001e: 0x1e, # RECORD SEPARATOR + 0x001f: 0x1f, # UNIT SEPARATOR + 0x0020: 0x40, # SPACE + 0x0021: 0x5a, # EXCLAMATION MARK + 0x0022: 0x7f, # QUOTATION MARK + 0x0023: 0x7b, # NUMBER SIGN + 0x0024: 0x5b, # DOLLAR SIGN + 0x0025: 0x6c, # PERCENT SIGN + 0x0026: 0x50, # AMPERSAND + 0x0027: 0x7d, # APOSTROPHE + 0x0028: 0x4d, # LEFT PARENTHESIS + 0x0029: 0x5d, # RIGHT PARENTHESIS + 0x002a: 0x5c, # ASTERISK + 0x002b: 0x4e, # PLUS SIGN + 0x002c: 0x6b, # COMMA + 0x002d: 0x60, # HYPHEN-MINUS + 0x002e: 0x4b, # FULL STOP + 0x002f: 0x61, # SOLIDUS + 0x0030: 0xf0, # DIGIT ZERO + 0x0031: 0xf1, # DIGIT ONE + 0x0032: 0xf2, # DIGIT TWO + 0x0033: 0xf3, # DIGIT THREE + 0x0034: 0xf4, # DIGIT FOUR + 0x0035: 0xf5, # DIGIT FIVE + 0x0036: 0xf6, # DIGIT SIX + 0x0037: 0xf7, # DIGIT SEVEN + 0x0038: 0xf8, # DIGIT EIGHT + 0x0039: 0xf9, # DIGIT NINE + 0x003a: 0x7a, # COLON + 0x003b: 0x5e, # SEMICOLON + 0x003c: 0x4c, # LESS-THAN SIGN + 0x003d: 0x7e, # EQUALS SIGN + 0x003e: 0x6e, # GREATER-THAN SIGN + 0x003f: 0x6f, # QUESTION MARK + 0x0040: 0x7c, # COMMERCIAL AT + 0x0041: 0xc1, # LATIN CAPITAL LETTER A + 0x0042: 0xc2, # LATIN CAPITAL LETTER B + 0x0043: 0xc3, # LATIN CAPITAL LETTER C + 0x0044: 0xc4, # LATIN CAPITAL LETTER D + 0x0045: 0xc5, # LATIN CAPITAL LETTER E + 0x0046: 0xc6, # LATIN CAPITAL LETTER F + 0x0047: 0xc7, # LATIN CAPITAL LETTER G + 0x0048: 0xc8, # LATIN CAPITAL LETTER H + 0x0049: 0xc9, # LATIN CAPITAL LETTER I + 0x004a: 0xd1, # LATIN CAPITAL LETTER J + 0x004b: 0xd2, # LATIN CAPITAL LETTER K + 0x004c: 0xd3, # LATIN CAPITAL LETTER L + 0x004d: 0xd4, # LATIN CAPITAL LETTER M + 0x004e: 0xd5, # LATIN CAPITAL LETTER N + 0x004f: 0xd6, # LATIN CAPITAL LETTER O + 0x0050: 0xd7, # LATIN CAPITAL LETTER P + 0x0051: 0xd8, # LATIN CAPITAL LETTER Q + 0x0052: 0xd9, # LATIN CAPITAL LETTER R + 0x0053: 0xe2, # LATIN CAPITAL LETTER S + 0x0054: 0xe3, # LATIN CAPITAL LETTER T + 0x0055: 0xe4, # LATIN CAPITAL LETTER U + 0x0056: 0xe5, # LATIN CAPITAL LETTER V + 0x0057: 0xe6, # LATIN CAPITAL LETTER W + 0x0058: 0xe7, # LATIN CAPITAL LETTER X + 0x0059: 0xe8, # LATIN CAPITAL LETTER Y + 0x005a: 0xe9, # LATIN CAPITAL LETTER Z + 0x005b: 0xba, # LEFT SQUARE BRACKET + 0x005c: 0xe0, # REVERSE SOLIDUS + 0x005d: 0xbb, # RIGHT SQUARE BRACKET + 0x005e: 0xb0, # CIRCUMFLEX ACCENT + 0x005f: 0x6d, # LOW LINE + 0x0060: 0x79, # GRAVE ACCENT + 0x0061: 0x81, # LATIN SMALL LETTER A + 0x0062: 0x82, # LATIN SMALL LETTER B + 0x0063: 0x83, # LATIN SMALL LETTER C + 0x0064: 0x84, # LATIN SMALL LETTER D + 0x0065: 0x85, # LATIN SMALL LETTER E + 0x0066: 0x86, # LATIN SMALL LETTER F + 0x0067: 0x87, # LATIN SMALL LETTER G + 0x0068: 0x88, # LATIN SMALL LETTER H + 0x0069: 0x89, # LATIN SMALL LETTER I + 0x006a: 0x91, # LATIN SMALL LETTER J + 0x006b: 0x92, # LATIN SMALL LETTER K + 0x006c: 0x93, # LATIN SMALL LETTER L + 0x006d: 0x94, # LATIN SMALL LETTER M + 0x006e: 0x95, # LATIN SMALL LETTER N + 0x006f: 0x96, # LATIN SMALL LETTER O + 0x0070: 0x97, # LATIN SMALL LETTER P + 0x0071: 0x98, # LATIN SMALL LETTER Q + 0x0072: 0x99, # LATIN SMALL LETTER R + 0x0073: 0xa2, # LATIN SMALL LETTER S + 0x0074: 0xa3, # LATIN SMALL LETTER T + 0x0075: 0xa4, # LATIN SMALL LETTER U + 0x0076: 0xa5, # LATIN SMALL LETTER V + 0x0077: 0xa6, # LATIN SMALL LETTER W + 0x0078: 0xa7, # LATIN SMALL LETTER X + 0x0079: 0xa8, # LATIN SMALL LETTER Y + 0x007a: 0xa9, # LATIN SMALL LETTER Z + 0x007b: 0xc0, # LEFT CURLY BRACKET + 0x007c: 0x4f, # VERTICAL LINE + 0x007d: 0xd0, # RIGHT CURLY BRACKET + 0x007e: 0xa1, # TILDE + 0x007f: 0x07, # DELETE + 0x0080: 0x20, # CONTROL + 0x0081: 0x21, # CONTROL + 0x0082: 0x22, # CONTROL + 0x0083: 0x23, # CONTROL + 0x0084: 0x24, # CONTROL + 0x0085: 0x15, # CONTROL + 0x0086: 0x06, # CONTROL + 0x0087: 0x17, # CONTROL + 0x0088: 0x28, # CONTROL + 0x0089: 0x29, # CONTROL + 0x008a: 0x2a, # CONTROL + 0x008b: 0x2b, # CONTROL + 0x008c: 0x2c, # CONTROL + 0x008d: 0x09, # CONTROL + 0x008e: 0x0a, # CONTROL + 0x008f: 0x1b, # CONTROL + 0x0090: 0x30, # CONTROL + 0x0091: 0x31, # CONTROL + 0x0092: 0x1a, # CONTROL + 0x0093: 0x33, # CONTROL + 0x0094: 0x34, # CONTROL + 0x0095: 0x35, # CONTROL + 0x0096: 0x36, # CONTROL + 0x0097: 0x08, # CONTROL + 0x0098: 0x38, # CONTROL + 0x0099: 0x39, # CONTROL + 0x009a: 0x3a, # CONTROL + 0x009b: 0x3b, # CONTROL + 0x009c: 0x04, # CONTROL + 0x009d: 0x14, # CONTROL + 0x009e: 0x3e, # CONTROL + 0x009f: 0xff, # CONTROL + 0x00a0: 0x41, # NO-BREAK SPACE + 0x00a1: 0xaa, # INVERTED EXCLAMATION MARK + 0x00a2: 0x4a, # CENT SIGN + 0x00a3: 0xb1, # POUND SIGN + 0x00a5: 0xb2, # YEN SIGN + 0x00a6: 0x6a, # BROKEN BAR + 0x00a7: 0xb5, # SECTION SIGN + 0x00a8: 0xbd, # DIAERESIS + 0x00a9: 0xb4, # COPYRIGHT SIGN + 0x00aa: 0x9a, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x8a, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x5f, # NOT SIGN + 0x00ad: 0xca, # SOFT HYPHEN + 0x00ae: 0xaf, # REGISTERED SIGN + 0x00af: 0xbc, # MACRON + 0x00b0: 0x90, # DEGREE SIGN + 0x00b1: 0x8f, # PLUS-MINUS SIGN + 0x00b2: 0xea, # SUPERSCRIPT TWO + 0x00b3: 0xfa, # SUPERSCRIPT THREE + 0x00b4: 0xbe, # ACUTE ACCENT + 0x00b5: 0xa0, # MICRO SIGN + 0x00b6: 0xb6, # PILCROW SIGN + 0x00b7: 0xb3, # MIDDLE DOT + 0x00b8: 0x9d, # CEDILLA + 0x00b9: 0xda, # SUPERSCRIPT ONE + 0x00ba: 0x9b, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x8b, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0xb7, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0xb8, # VULGAR FRACTION ONE HALF + 0x00be: 0xb9, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0xab, # INVERTED QUESTION MARK + 0x00c0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x9e, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0xac, # LATIN CAPITAL LETTER ETH (ICELANDIC) + 0x00d1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0xed, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0xee, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0xeb, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0xef, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0xec, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0xbf, # MULTIPLICATION SIGN + 0x00d8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0xfd, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0xfe, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0xfb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0xfc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0xad, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0xae, # LATIN CAPITAL LETTER THORN (ICELANDIC) + 0x00df: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e0: 0x44, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x45, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x46, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x9c, # LATIN SMALL LIGATURE AE + 0x00e7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x54, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x51, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x58, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x55, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x8c, # LATIN SMALL LETTER ETH (ICELANDIC) + 0x00f1: 0x49, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0xcd, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0xce, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0xcb, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0xcf, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0xcc, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0xe1, # DIVISION SIGN + 0x00f8: 0x70, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0xdd, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0xde, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0xdb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0xdc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x8d, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x8e, # LATIN SMALL LETTER THORN (ICELANDIC) + 0x00ff: 0xdf, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x20ac: 0x9f, # EURO SIGN +} \ No newline at end of file Index: cp1250.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1250.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp1250.py 21 Oct 2005 13:49:12 -0000 1.5 +++ cp1250.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1250.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT' with gencodec.py. """#" @@ -28,604 +28,520 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1087 lines suppressed...] + 0x02dd: 0xbd, # DOUBLE ACUTE ACCENT + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } \ No newline at end of file Index: cp1251.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1251.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp1251.py 21 Oct 2005 13:49:12 -0000 1.5 +++ cp1251.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1251.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT' with gencodec.py. """#" @@ -28,642 +28,524 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1129 lines suppressed...] + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x88, # EURO SIGN + 0x2116: 0xb9, # NUMERO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } \ No newline at end of file Index: cp1252.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1252.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp1252.py 21 Oct 2005 13:49:12 -0000 1.5 +++ cp1252.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1252.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT' with gencodec.py. """#" @@ -28,557 +28,520 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1040 lines suppressed...] + 0x02dc: 0x98, # SMALL TILDE + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } \ No newline at end of file Index: cp1253.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1253.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp1253.py 21 Oct 2005 13:49:12 -0000 1.5 +++ cp1253.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1253.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT' with gencodec.py. """#" @@ -28,620 +28,508 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1091 lines suppressed...] + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2015: 0xaf, # HORIZONTAL BAR + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } \ No newline at end of file Index: cp1254.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1254.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp1254.py 21 Oct 2005 13:49:12 -0000 1.5 +++ cp1254.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1254.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT' with gencodec.py. """#" @@ -28,561 +28,518 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1042 lines suppressed...] + 0x02dc: 0x98, # SMALL TILDE + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } \ No newline at end of file Index: cp1255.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1255.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp1255.py 21 Oct 2005 13:49:12 -0000 1.5 +++ cp1255.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1255.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT' with gencodec.py. """#" @@ -28,606 +28,502 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1071 lines suppressed...] + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20aa: 0xa4, # NEW SHEQEL SIGN + 0x20ac: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } \ No newline at end of file Index: cp1256.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1256.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp1256.py 21 Oct 2005 13:49:12 -0000 1.5 +++ cp1256.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1256.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT' with gencodec.py. """#" @@ -28,615 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1103 lines suppressed...] + 0x200f: 0xfe, # RIGHT-TO-LEFT MARK + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } \ No newline at end of file Index: cp1257.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1257.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp1257.py 21 Oct 2005 13:49:12 -0000 1.5 +++ cp1257.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1257.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT' with gencodec.py. """#" @@ -28,605 +28,513 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1081 lines suppressed...] + 0x02db: 0x9e, # OGONEK + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ac: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } \ No newline at end of file Index: cp1258.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1258.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp1258.py 21 Oct 2005 13:49:12 -0000 1.5 +++ cp1258.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP1258.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT' with gencodec.py. """#" @@ -28,567 +28,516 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1046 lines suppressed...] + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0x86, # DAGGER + 0x2021: 0x87, # DOUBLE DAGGER + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x2030: 0x89, # PER MILLE SIGN + 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20ab: 0xfe, # DONG SIGN + 0x20ac: 0x80, # EURO SIGN + 0x2122: 0x99, # TRADE MARK SIGN } \ No newline at end of file Index: cp424.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp424.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp424.py 21 Oct 2005 13:49:12 -0000 1.5 +++ cp424.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MISC/CP424.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MISC/CP424.TXT' with gencodec.py. """#" @@ -28,728 +28,487 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1178 lines suppressed...] + 0x05da: 0x52, # HEBREW LETTER FINAL KAF + 0x05db: 0x53, # HEBREW LETTER KAF + 0x05dc: 0x54, # HEBREW LETTER LAMED + 0x05dd: 0x55, # HEBREW LETTER FINAL MEM + 0x05de: 0x56, # HEBREW LETTER MEM + 0x05df: 0x57, # HEBREW LETTER FINAL NUN + 0x05e0: 0x58, # HEBREW LETTER NUN + 0x05e1: 0x59, # HEBREW LETTER SAMEKH + 0x05e2: 0x62, # HEBREW LETTER AYIN + 0x05e3: 0x63, # HEBREW LETTER FINAL PE + 0x05e4: 0x64, # HEBREW LETTER PE + 0x05e5: 0x65, # HEBREW LETTER FINAL TSADI + 0x05e6: 0x66, # HEBREW LETTER TSADI + 0x05e7: 0x67, # HEBREW LETTER QOF + 0x05e8: 0x68, # HEBREW LETTER RESH + 0x05e9: 0x69, # HEBREW LETTER SHIN + 0x05ea: 0x71, # HEBREW LETTER TAV + 0x2017: 0x78, # DOUBLE LOW LINE } \ No newline at end of file Index: cp500.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp500.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp500.py 21 Oct 2005 14:35:35 -0000 1.5 +++ cp500.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/EBCDIC/CP500.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT' with gencodec.py. """#" @@ -28,766 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1254 lines suppressed...] + 0x00ee: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x8c, # LATIN SMALL LETTER ETH (ICELANDIC) + 0x00f1: 0x49, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0xcd, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0xce, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0xcb, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0xcf, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0xcc, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0xe1, # DIVISION SIGN + 0x00f8: 0x70, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0xdd, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0xde, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0xdb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0xdc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x8d, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x8e, # LATIN SMALL LETTER THORN (ICELANDIC) + 0x00ff: 0xdf, # LATIN SMALL LETTER Y WITH DIAERESIS } \ No newline at end of file Index: cp856.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp856.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp856.py 21 Oct 2005 13:49:12 -0000 1.6 +++ cp856.py 24 Oct 2005 12:07:48 -0000 1.7 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MISC/CP856.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MISC/CP856.TXT' with gencodec.py. """#" @@ -28,617 +28,484 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1064 lines suppressed...] + 0x2550: 0xcd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0xba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0xc9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0xbb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0xc8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0xbc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0xcc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0xb9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0xcb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0xca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0xce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0xdf, # UPPER HALF BLOCK + 0x2584: 0xdc, # LOWER HALF BLOCK + 0x2588: 0xdb, # FULL BLOCK + 0x2591: 0xb0, # LIGHT SHADE + 0x2592: 0xb1, # MEDIUM SHADE + 0x2593: 0xb2, # DARK SHADE + 0x25a0: 0xfe, # BLACK SQUARE } \ No newline at end of file Index: cp874.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp874.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp874.py 21 Oct 2005 13:49:12 -0000 1.5 +++ cp874.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/WINDOWS/CP874.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP874.TXT' with gencodec.py. """#" @@ -28,626 +28,494 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1083 lines suppressed...] + 0x0e53: 0xf3, # THAI DIGIT THREE + 0x0e54: 0xf4, # THAI DIGIT FOUR + 0x0e55: 0xf5, # THAI DIGIT FIVE + 0x0e56: 0xf6, # THAI DIGIT SIX + 0x0e57: 0xf7, # THAI DIGIT SEVEN + 0x0e58: 0xf8, # THAI DIGIT EIGHT + 0x0e59: 0xf9, # THAI DIGIT NINE + 0x0e5a: 0xfa, # THAI CHARACTER ANGKHANKHU + 0x0e5b: 0xfb, # THAI CHARACTER KHOMUT + 0x2013: 0x96, # EN DASH + 0x2014: 0x97, # EM DASH + 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x2022: 0x95, # BULLET + 0x2026: 0x85, # HORIZONTAL ELLIPSIS + 0x20ac: 0x80, # EURO SIGN } \ No newline at end of file Index: cp875.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp875.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- cp875.py 21 Oct 2005 14:35:35 -0000 1.5 +++ cp875.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/EBCDIC/CP875.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT' with gencodec.py. """#" @@ -28,761 +28,519 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1242 lines suppressed...] + 0x03c0: 0xad, # GREEK SMALL LETTER PI + 0x03c1: 0xae, # GREEK SMALL LETTER RHO + 0x03c2: 0xba, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0xaf, # GREEK SMALL LETTER SIGMA + 0x03c4: 0xbb, # GREEK SMALL LETTER TAU + 0x03c5: 0xbc, # GREEK SMALL LETTER UPSILON + 0x03c6: 0xbd, # GREEK SMALL LETTER PHI + 0x03c7: 0xbe, # GREEK SMALL LETTER CHI + 0x03c8: 0xbf, # GREEK SMALL LETTER PSI + 0x03c9: 0xcb, # GREEK SMALL LETTER OMEGA + 0x03ca: 0xb4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0xb8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0xb6, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0xb7, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0xb9, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2015: 0xcf, # HORIZONTAL BAR + 0x2018: 0xce, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xde, # RIGHT SINGLE QUOTATION MARK } \ No newline at end of file Index: iso8859_1.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_1.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- iso8859_1.py 21 Oct 2005 13:49:12 -0000 1.5 +++ iso8859_1.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'ISO8859/8859-1.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-1.TXT' with gencodec.py. """#" @@ -28,530 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1018 lines suppressed...] + 0x00ee: 0xee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0xef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0xf0, # LATIN SMALL LETTER ETH (Icelandic) + 0x00f1: 0xf1, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0xf2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0xf5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0xf7, # DIVISION SIGN + 0x00f8: 0xf8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0xf9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0xfb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0xfd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0xfe, # LATIN SMALL LETTER THORN (Icelandic) + 0x00ff: 0xff, # LATIN SMALL LETTER Y WITH DIAERESIS } \ No newline at end of file Index: iso8859_10.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_10.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- iso8859_10.py 21 Oct 2005 13:49:12 -0000 1.5 +++ iso8859_10.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'ISO8859/8859-10.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-10.TXT' with gencodec.py. """#" @@ -28,576 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1064 lines suppressed...] + 0x0146: 0xf1, # LATIN SMALL LETTER N WITH CEDILLA + 0x014a: 0xaf, # LATIN CAPITAL LETTER ENG + 0x014b: 0xbf, # LATIN SMALL LETTER ENG + 0x014c: 0xd2, # LATIN CAPITAL LETTER O WITH MACRON + 0x014d: 0xf2, # LATIN SMALL LETTER O WITH MACRON + 0x0160: 0xaa, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xba, # LATIN SMALL LETTER S WITH CARON + 0x0166: 0xab, # LATIN CAPITAL LETTER T WITH STROKE + 0x0167: 0xbb, # LATIN SMALL LETTER T WITH STROKE + 0x0168: 0xd7, # LATIN CAPITAL LETTER U WITH TILDE + 0x0169: 0xf7, # LATIN SMALL LETTER U WITH TILDE + 0x016a: 0xae, # LATIN CAPITAL LETTER U WITH MACRON + 0x016b: 0xbe, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0xd9, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0xf9, # LATIN SMALL LETTER U WITH OGONEK + 0x017d: 0xac, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0xbc, # LATIN SMALL LETTER Z WITH CARON + 0x2015: 0xbd, # HORIZONTAL BAR } \ No newline at end of file Index: iso8859_11.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_11.py,v retrieving revision 1.3 retrieving revision 1.4 diff -u -d -r1.3 -r1.4 --- iso8859_11.py 21 Oct 2005 13:49:12 -0000 1.3 +++ iso8859_11.py 24 Oct 2005 12:07:48 -0000 1.4 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'ISO8859/8859-11.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-11.TXT' with gencodec.py. """#" @@ -28,362 +28,262 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1087 lines suppressed...] + 0x0e4a: 0xea, # THAI CHARACTER MAI TRI + 0x0e4b: 0xeb, # THAI CHARACTER MAI CHATTAWA + 0x0e4c: 0xec, # THAI CHARACTER THANTHAKHAT + 0x0e4d: 0xed, # THAI CHARACTER NIKHAHIT + 0x0e4e: 0xee, # THAI CHARACTER YAMAKKAN + 0x0e4f: 0xef, # THAI CHARACTER FONGMAN + 0x0e50: 0xf0, # THAI DIGIT ZERO + 0x0e51: 0xf1, # THAI DIGIT ONE + 0x0e52: 0xf2, # THAI DIGIT TWO + 0x0e53: 0xf3, # THAI DIGIT THREE + 0x0e54: 0xf4, # THAI DIGIT FOUR + 0x0e55: 0xf5, # THAI DIGIT FIVE + 0x0e56: 0xf6, # THAI DIGIT SIX + 0x0e57: 0xf7, # THAI DIGIT SEVEN + 0x0e58: 0xf8, # THAI DIGIT EIGHT + 0x0e59: 0xf9, # THAI DIGIT NINE + 0x0e5a: 0xfa, # THAI CHARACTER ANGKHANKHU + 0x0e5b: 0xfb, # THAI CHARACTER KHOMUT } \ No newline at end of file Index: iso8859_13.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_13.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- iso8859_13.py 21 Oct 2005 13:49:12 -0000 1.5 +++ iso8859_13.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'ISO8859/8859-13.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-13.TXT' with gencodec.py. """#" @@ -28,586 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1074 lines suppressed...] + 0x015a: 0xda, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0xfa, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0xd0, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xf0, # LATIN SMALL LETTER S WITH CARON + 0x016a: 0xdb, # LATIN CAPITAL LETTER U WITH MACRON + 0x016b: 0xfb, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0xd8, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0xf8, # LATIN SMALL LETTER U WITH OGONEK + 0x0179: 0xca, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0xea, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0xdd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0xfd, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0xde, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0xfe, # LATIN SMALL LETTER Z WITH CARON + 0x2019: 0xff, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0xb4, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0xa1, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0xa5, # DOUBLE LOW-9 QUOTATION MARK } \ No newline at end of file Index: iso8859_14.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_14.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- iso8859_14.py 21 Oct 2005 13:49:12 -0000 1.5 +++ iso8859_14.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'ISO8859/8859-14.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-14.TXT' with gencodec.py. """#" @@ -28,561 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1049 lines suppressed...] + 0x1e1e: 0xb0, # LATIN CAPITAL LETTER F WITH DOT ABOVE + 0x1e1f: 0xb1, # LATIN SMALL LETTER F WITH DOT ABOVE + 0x1e40: 0xb4, # LATIN CAPITAL LETTER M WITH DOT ABOVE + 0x1e41: 0xb5, # LATIN SMALL LETTER M WITH DOT ABOVE + 0x1e56: 0xb7, # LATIN CAPITAL LETTER P WITH DOT ABOVE + 0x1e57: 0xb9, # LATIN SMALL LETTER P WITH DOT ABOVE + 0x1e60: 0xbb, # LATIN CAPITAL LETTER S WITH DOT ABOVE + 0x1e61: 0xbf, # LATIN SMALL LETTER S WITH DOT ABOVE + 0x1e6a: 0xd7, # LATIN CAPITAL LETTER T WITH DOT ABOVE + 0x1e6b: 0xf7, # LATIN SMALL LETTER T WITH DOT ABOVE + 0x1e80: 0xa8, # LATIN CAPITAL LETTER W WITH GRAVE + 0x1e81: 0xb8, # LATIN SMALL LETTER W WITH GRAVE + 0x1e82: 0xaa, # LATIN CAPITAL LETTER W WITH ACUTE + 0x1e83: 0xba, # LATIN SMALL LETTER W WITH ACUTE + 0x1e84: 0xbd, # LATIN CAPITAL LETTER W WITH DIAERESIS + 0x1e85: 0xbe, # LATIN SMALL LETTER W WITH DIAERESIS + 0x1ef2: 0xac, # LATIN CAPITAL LETTER Y WITH GRAVE + 0x1ef3: 0xbc, # LATIN SMALL LETTER Y WITH GRAVE } \ No newline at end of file Index: iso8859_15.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_15.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- iso8859_15.py 21 Oct 2005 13:49:12 -0000 1.5 +++ iso8859_15.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'ISO8859/8859-15.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-15.TXT' with gencodec.py. """#" @@ -28,538 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1026 lines suppressed...] + 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0xf7, # DIVISION SIGN + 0x00f8: 0xf8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0xf9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0xfb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0xfd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0xfe, # LATIN SMALL LETTER THORN + 0x00ff: 0xff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0152: 0xbc, # LATIN CAPITAL LIGATURE OE + 0x0153: 0xbd, # LATIN SMALL LIGATURE OE + 0x0160: 0xa6, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xa8, # LATIN SMALL LETTER S WITH CARON + 0x0178: 0xbe, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x017d: 0xb4, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0xb8, # LATIN SMALL LETTER Z WITH CARON + 0x20ac: 0xa4, # EURO SIGN } \ No newline at end of file Index: iso8859_16.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_16.py,v retrieving revision 1.3 retrieving revision 1.4 diff -u -d -r1.3 -r1.4 --- iso8859_16.py 21 Oct 2005 13:49:12 -0000 1.3 +++ iso8859_16.py 24 Oct 2005 12:07:48 -0000 1.4 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'ISO8859/8859-16.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-16.TXT' with gencodec.py. """#" @@ -28,570 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1058 lines suppressed...] + 0x0160: 0xa6, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xa8, # LATIN SMALL LETTER S WITH CARON + 0x0170: 0xd8, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0xf8, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0178: 0xbe, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0179: 0xac, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0xae, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0xaf, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0xbf, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0xb4, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0xb8, # LATIN SMALL LETTER Z WITH CARON + 0x0218: 0xaa, # LATIN CAPITAL LETTER S WITH COMMA BELOW + 0x0219: 0xba, # LATIN SMALL LETTER S WITH COMMA BELOW + 0x021a: 0xde, # LATIN CAPITAL LETTER T WITH COMMA BELOW + 0x021b: 0xfe, # LATIN SMALL LETTER T WITH COMMA BELOW + 0x201d: 0xb5, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0xa5, # DOUBLE LOW-9 QUOTATION MARK + 0x20ac: 0xa4, # EURO SIGN } \ No newline at end of file Index: iso8859_2.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_2.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- iso8859_2.py 21 Oct 2005 13:49:12 -0000 1.5 +++ iso8859_2.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'ISO8859/8859-2.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-2.TXT' with gencodec.py. """#" @@ -28,587 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1075 lines suppressed...] + 0x0163: 0xfe, # LATIN SMALL LETTER T WITH CEDILLA + 0x0164: 0xab, # LATIN CAPITAL LETTER T WITH CARON + 0x0165: 0xbb, # LATIN SMALL LETTER T WITH CARON + 0x016e: 0xd9, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x016f: 0xf9, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0170: 0xdb, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0xfb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0179: 0xac, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0xbc, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0xaf, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0xbf, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0xae, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0xbe, # LATIN SMALL LETTER Z WITH CARON + 0x02c7: 0xb7, # CARON + 0x02d8: 0xa2, # BREVE + 0x02d9: 0xff, # DOT ABOVE + 0x02db: 0xb2, # OGONEK + 0x02dd: 0xbd, # DOUBLE ACUTE ACCENT } \ No newline at end of file Index: iso8859_3.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_3.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- iso8859_3.py 21 Oct 2005 13:49:12 -0000 1.5 +++ iso8859_3.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'ISO8859/8859-3.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-3.TXT' with gencodec.py. """#" @@ -28,558 +28,518 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1032 lines suppressed...] + 0x0124: 0xa6, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX + 0x0125: 0xb6, # LATIN SMALL LETTER H WITH CIRCUMFLEX + 0x0126: 0xa1, # LATIN CAPITAL LETTER H WITH STROKE + 0x0127: 0xb1, # LATIN SMALL LETTER H WITH STROKE + 0x0130: 0xa9, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0xb9, # LATIN SMALL LETTER DOTLESS I + 0x0134: 0xac, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX + 0x0135: 0xbc, # LATIN SMALL LETTER J WITH CIRCUMFLEX + 0x015c: 0xde, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX + 0x015d: 0xfe, # LATIN SMALL LETTER S WITH CIRCUMFLEX + 0x015e: 0xaa, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0xba, # LATIN SMALL LETTER S WITH CEDILLA + 0x016c: 0xdd, # LATIN CAPITAL LETTER U WITH BREVE + 0x016d: 0xfd, # LATIN SMALL LETTER U WITH BREVE + 0x017b: 0xaf, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0xbf, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x02d8: 0xa2, # BREVE + 0x02d9: 0xff, # DOT ABOVE } \ No newline at end of file Index: iso8859_4.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_4.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- iso8859_4.py 21 Oct 2005 13:49:12 -0000 1.5 +++ iso8859_4.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'ISO8859/8859-4.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-4.TXT' with gencodec.py. """#" @@ -28,580 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1068 lines suppressed...] + 0x014d: 0xf2, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0xa3, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0xb3, # LATIN SMALL LETTER R WITH CEDILLA + 0x0160: 0xa9, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xb9, # LATIN SMALL LETTER S WITH CARON + 0x0166: 0xac, # LATIN CAPITAL LETTER T WITH STROKE + 0x0167: 0xbc, # LATIN SMALL LETTER T WITH STROKE + 0x0168: 0xdd, # LATIN CAPITAL LETTER U WITH TILDE + 0x0169: 0xfd, # LATIN SMALL LETTER U WITH TILDE + 0x016a: 0xde, # LATIN CAPITAL LETTER U WITH MACRON + 0x016b: 0xfe, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0xd9, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0xf9, # LATIN SMALL LETTER U WITH OGONEK + 0x017d: 0xae, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0xbe, # LATIN SMALL LETTER Z WITH CARON + 0x02c7: 0xb7, # CARON + 0x02d9: 0xff, # DOT ABOVE + 0x02db: 0xb2, # OGONEK } \ No newline at end of file Index: iso8859_5.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_5.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- iso8859_5.py 21 Oct 2005 13:49:12 -0000 1.5 +++ iso8859_5.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'ISO8859/8859-5.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-5.TXT' with gencodec.py. """#" @@ -28,624 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1112 lines suppressed...] + 0x044d: 0xed, # CYRILLIC SMALL LETTER E + 0x044e: 0xee, # CYRILLIC SMALL LETTER YU + 0x044f: 0xef, # CYRILLIC SMALL LETTER YA + 0x0451: 0xf1, # CYRILLIC SMALL LETTER IO + 0x0452: 0xf2, # CYRILLIC SMALL LETTER DJE + 0x0453: 0xf3, # CYRILLIC SMALL LETTER GJE + 0x0454: 0xf4, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0xf5, # CYRILLIC SMALL LETTER DZE + 0x0456: 0xf6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0xf7, # CYRILLIC SMALL LETTER YI + 0x0458: 0xf8, # CYRILLIC SMALL LETTER JE + 0x0459: 0xf9, # CYRILLIC SMALL LETTER LJE + 0x045a: 0xfa, # CYRILLIC SMALL LETTER NJE + 0x045b: 0xfb, # CYRILLIC SMALL LETTER TSHE + 0x045c: 0xfc, # CYRILLIC SMALL LETTER KJE + 0x045e: 0xfe, # CYRILLIC SMALL LETTER SHORT U + 0x045f: 0xff, # CYRILLIC SMALL LETTER DZHE + 0x2116: 0xf0, # NUMERO SIGN } \ No newline at end of file Index: iso8859_6.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_6.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- iso8859_6.py 21 Oct 2005 13:49:12 -0000 1.5 +++ iso8859_6.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'ISO8859/8859-6.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-6.TXT' with gencodec.py. """#" @@ -28,273 +28,175 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x00a1: None, - 0x00a2: None, - 0x00a3: None, - 0x00a5: None, - 0x00a6: None, - 0x00a7: None, - 0x00a8: None, - 0x00a9: None, - 0x00aa: None, - 0x00ab: None, - 0x00ac: 0x060c, # ARABIC COMMA - 0x00ae: None, - 0x00af: None, - 0x00b0: None, - 0x00b1: None, - 0x00b2: None, - 0x00b3: None, - 0x00b4: None, - 0x00b5: None, - 0x00b6: None, - 0x00b7: None, - 0x00b8: None, - 0x00b9: None, - 0x00ba: None, - 0x00bb: 0x061b, # ARABIC SEMICOLON - 0x00bc: None, - 0x00bd: None, - 0x00be: None, - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: None, - 0x00c1: 0x0621, # ARABIC LETTER HAMZA - 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x00c7: 0x0627, # ARABIC LETTER ALEF - 0x00c8: 0x0628, # ARABIC LETTER BEH - 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA - 0x00ca: 0x062a, # ARABIC LETTER TEH - 0x00cb: 0x062b, # ARABIC LETTER THEH - 0x00cc: 0x062c, # ARABIC LETTER JEEM - 0x00cd: 0x062d, # ARABIC LETTER HAH - 0x00ce: 0x062e, # ARABIC LETTER KHAH - 0x00cf: 0x062f, # ARABIC LETTER DAL - 0x00d0: 0x0630, # ARABIC LETTER THAL - 0x00d1: 0x0631, # ARABIC LETTER REH - 0x00d2: 0x0632, # ARABIC LETTER ZAIN - 0x00d3: 0x0633, # ARABIC LETTER SEEN - 0x00d4: 0x0634, # ARABIC LETTER SHEEN - 0x00d5: 0x0635, # ARABIC LETTER SAD - 0x00d6: 0x0636, # ARABIC LETTER DAD - 0x00d7: 0x0637, # ARABIC LETTER TAH - 0x00d8: 0x0638, # ARABIC LETTER ZAH - 0x00d9: 0x0639, # ARABIC LETTER AIN - 0x00da: 0x063a, # ARABIC LETTER GHAIN - 0x00db: None, - 0x00dc: None, - 0x00dd: None, - 0x00de: None, - 0x00df: None, - 0x00e0: 0x0640, # ARABIC TATWEEL - 0x00e1: 0x0641, # ARABIC LETTER FEH - 0x00e2: 0x0642, # ARABIC LETTER QAF - 0x00e3: 0x0643, # ARABIC LETTER KAF - 0x00e4: 0x0644, # ARABIC LETTER LAM - 0x00e5: 0x0645, # ARABIC LETTER MEEM - 0x00e6: 0x0646, # ARABIC LETTER NOON - 0x00e7: 0x0647, # ARABIC LETTER HEH - 0x00e8: 0x0648, # ARABIC LETTER WAW - 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA - 0x00ea: 0x064a, # ARABIC LETTER YEH - 0x00eb: 0x064b, # ARABIC FATHATAN - 0x00ec: 0x064c, # ARABIC DAMMATAN - 0x00ed: 0x064d, # ARABIC KASRATAN - 0x00ee: 0x064e, # ARABIC FATHA - 0x00ef: 0x064f, # ARABIC DAMMA - 0x00f0: 0x0650, # ARABIC KASRA - 0x00f1: 0x0651, # ARABIC SHADDA - 0x00f2: 0x0652, # ARABIC SUKUN - 0x00f3: None, - 0x00f4: None, - 0x00f5: None, - 0x00f6: None, - 0x00f7: None, - 0x00f8: None, - 0x00f9: None, - 0x00fa: None, - 0x00fb: None, - 0x00fc: None, - 0x00fd: None, - 0x00fe: None, - 0x00ff: None, -}) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\x80' # 0x0080 -> - u'\x81' # 0x0081 -> - u'\x82' # 0x0082 -> - u'\x83' # 0x0083 -> - u'\x84' # 0x0084 -> - u'\x85' # 0x0085 -> - u'\x86' # 0x0086 -> - u'\x87' # 0x0087 -> - u'\x88' # 0x0088 -> - u'\x89' # 0x0089 -> - u'\x8a' # 0x008a -> - u'\x8b' # 0x008b -> - u'\x8c' # 0x008c -> - u'\x8d' # 0x008d -> - u'\x8e' # 0x008e -> - u'\x8f' # 0x008f -> - u'\x90' # 0x0090 -> - u'\x91' # 0x0091 -> - u'\x92' # 0x0092 -> - u'\x93' # 0x0093 -> - u'\x94' # 0x0094 -> - u'\x95' # 0x0095 -> - u'\x96' # 0x0096 -> - u'\x97' # 0x0097 -> - u'\x98' # 0x0098 -> - u'\x99' # 0x0099 -> - u'\x9a' # 0x009a -> - u'\x9b' # 0x009b -> - u'\x9c' # 0x009c -> - u'\x9d' # 0x009d -> - u'\x9e' # 0x009e -> - u'\x9f' # 0x009f -> - u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0a -> LINE FEED + u'\x0b' # 0x0b -> VERTICAL TABULATION + u'\x0c' # 0x0c -> FORM FEED + u'\r' # 0x0d -> CARRIAGE RETURN + u'\x0e' # 0x0e -> SHIFT OUT + u'\x0f' # 0x0f -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1a -> SUBSTITUTE + u'\x1b' # 0x1b -> ESCAPE + u'\x1c' # 0x1c -> FILE SEPARATOR + u'\x1d' # 0x1d -> GROUP SEPARATOR + u'\x1e' # 0x1e -> RECORD SEPARATOR + u'\x1f' # 0x1f -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2a -> ASTERISK + u'+' # 0x2b -> PLUS SIGN + u',' # 0x2c -> COMMA + u'-' # 0x2d -> HYPHEN-MINUS + u'.' # 0x2e -> FULL STOP + u'/' # 0x2f -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3a -> COLON + u';' # 0x3b -> SEMICOLON + u'<' # 0x3c -> LESS-THAN SIGN + u'=' # 0x3d -> EQUALS SIGN + u'>' # 0x3e -> GREATER-THAN SIGN + u'?' # 0x3f -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4a -> LATIN CAPITAL LETTER J + u'K' # 0x4b -> LATIN CAPITAL LETTER K + u'L' # 0x4c -> LATIN CAPITAL LETTER L + u'M' # 0x4d -> LATIN CAPITAL LETTER M + u'N' # 0x4e -> LATIN CAPITAL LETTER N + u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5a -> LATIN CAPITAL LETTER Z + u'[' # 0x5b -> LEFT SQUARE BRACKET + u'\\' # 0x5c -> REVERSE SOLIDUS + u']' # 0x5d -> RIGHT SQUARE BRACKET + u'^' # 0x5e -> CIRCUMFLEX ACCENT + u'_' # 0x5f -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6a -> LATIN SMALL LETTER J + u'k' # 0x6b -> LATIN SMALL LETTER K + u'l' # 0x6c -> LATIN SMALL LETTER L + u'm' # 0x6d -> LATIN SMALL LETTER M + u'n' # 0x6e -> LATIN SMALL LETTER N + u'o' # 0x6f -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7a -> LATIN SMALL LETTER Z + u'{' # 0x7b -> LEFT CURLY BRACKET + u'|' # 0x7c -> VERTICAL LINE + u'}' # 0x7d -> RIGHT CURLY BRACKET + u'~' # 0x7e -> TILDE + u'\x7f' # 0x7f -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8a -> + u'\x8b' # 0x8b -> + u'\x8c' # 0x8c -> + u'\x8d' # 0x8d -> + u'\x8e' # 0x8e -> + u'\x8f' # 0x8f -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9a -> + u'\x9b' # 0x9b -> + u'\x9c' # 0x9c -> + u'\x9d' # 0x9d -> + u'\x9e' # 0x9e -> + u'\x9f' # 0x9f -> + u'\xa0' # 0xa0 -> NO-BREAK SPACE u'\ufffe' u'\ufffe' u'\ufffe' - u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\xa4' # 0xa4 -> CURRENCY SIGN u'\ufffe' u'\ufffe' u'\ufffe' @@ -302,8 +204,8 @@ u'\ufffe' u'\ufffe' u'\ufffe' - u'\u060c' # 0x00ac -> ARABIC COMMA - u'\xad' # 0x00ad -> SOFT HYPHEN + u'\u060c' # 0xac -> ARABIC COMMA + u'\xad' # 0xad -> SOFT HYPHEN u'\ufffe' u'\ufffe' u'\ufffe' @@ -317,62 +219,62 @@ u'\ufffe' u'\ufffe' u'\ufffe' - u'\u061b' # 0x00bb -> ARABIC SEMICOLON + u'\u061b' # 0xbb -> ARABIC SEMICOLON u'\ufffe' u'\ufffe' u'\ufffe' - u'\u061f' # 0x00bf -> ARABIC QUESTION MARK + u'\u061f' # 0xbf -> ARABIC QUESTION MARK u'\ufffe' - u'\u0621' # 0x00c1 -> ARABIC LETTER HAMZA - u'\u0622' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0x00c7 -> ARABIC LETTER ALEF - u'\u0628' # 0x00c8 -> ARABIC LETTER BEH - u'\u0629' # 0x00c9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0x00ca -> ARABIC LETTER TEH - u'\u062b' # 0x00cb -> ARABIC LETTER THEH - u'\u062c' # 0x00cc -> ARABIC LETTER JEEM - u'\u062d' # 0x00cd -> ARABIC LETTER HAH - u'\u062e' # 0x00ce -> ARABIC LETTER KHAH - u'\u062f' # 0x00cf -> ARABIC LETTER DAL - u'\u0630' # 0x00d0 -> ARABIC LETTER THAL - u'\u0631' # 0x00d1 -> ARABIC LETTER REH - u'\u0632' # 0x00d2 -> ARABIC LETTER ZAIN - u'\u0633' # 0x00d3 -> ARABIC LETTER SEEN - u'\u0634' # 0x00d4 -> ARABIC LETTER SHEEN - u'\u0635' # 0x00d5 -> ARABIC LETTER SAD - u'\u0636' # 0x00d6 -> ARABIC LETTER DAD - u'\u0637' # 0x00d7 -> ARABIC LETTER TAH - u'\u0638' # 0x00d8 -> ARABIC LETTER ZAH - u'\u0639' # 0x00d9 -> ARABIC LETTER AIN - u'\u063a' # 0x00da -> ARABIC LETTER GHAIN + u'\u0621' # 0xc1 -> ARABIC LETTER HAMZA + u'\u0622' # 0xc2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0xc3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0xc4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0xc5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0xc6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0xc7 -> ARABIC LETTER ALEF + u'\u0628' # 0xc8 -> ARABIC LETTER BEH + u'\u0629' # 0xc9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0xca -> ARABIC LETTER TEH + u'\u062b' # 0xcb -> ARABIC LETTER THEH + u'\u062c' # 0xcc -> ARABIC LETTER JEEM + u'\u062d' # 0xcd -> ARABIC LETTER HAH + u'\u062e' # 0xce -> ARABIC LETTER KHAH + u'\u062f' # 0xcf -> ARABIC LETTER DAL + u'\u0630' # 0xd0 -> ARABIC LETTER THAL + u'\u0631' # 0xd1 -> ARABIC LETTER REH + u'\u0632' # 0xd2 -> ARABIC LETTER ZAIN + u'\u0633' # 0xd3 -> ARABIC LETTER SEEN + u'\u0634' # 0xd4 -> ARABIC LETTER SHEEN + u'\u0635' # 0xd5 -> ARABIC LETTER SAD + u'\u0636' # 0xd6 -> ARABIC LETTER DAD + u'\u0637' # 0xd7 -> ARABIC LETTER TAH + u'\u0638' # 0xd8 -> ARABIC LETTER ZAH + u'\u0639' # 0xd9 -> ARABIC LETTER AIN + u'\u063a' # 0xda -> ARABIC LETTER GHAIN u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' - u'\u0640' # 0x00e0 -> ARABIC TATWEEL - u'\u0641' # 0x00e1 -> ARABIC LETTER FEH - u'\u0642' # 0x00e2 -> ARABIC LETTER QAF - u'\u0643' # 0x00e3 -> ARABIC LETTER KAF - u'\u0644' # 0x00e4 -> ARABIC LETTER LAM - u'\u0645' # 0x00e5 -> ARABIC LETTER MEEM - u'\u0646' # 0x00e6 -> ARABIC LETTER NOON - u'\u0647' # 0x00e7 -> ARABIC LETTER HEH - u'\u0648' # 0x00e8 -> ARABIC LETTER WAW - u'\u0649' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0x00ea -> ARABIC LETTER YEH - u'\u064b' # 0x00eb -> ARABIC FATHATAN - u'\u064c' # 0x00ec -> ARABIC DAMMATAN - u'\u064d' # 0x00ed -> ARABIC KASRATAN - u'\u064e' # 0x00ee -> ARABIC FATHA - u'\u064f' # 0x00ef -> ARABIC DAMMA - u'\u0650' # 0x00f0 -> ARABIC KASRA - u'\u0651' # 0x00f1 -> ARABIC SHADDA - u'\u0652' # 0x00f2 -> ARABIC SUKUN + u'\u0640' # 0xe0 -> ARABIC TATWEEL + u'\u0641' # 0xe1 -> ARABIC LETTER FEH + u'\u0642' # 0xe2 -> ARABIC LETTER QAF + u'\u0643' # 0xe3 -> ARABIC LETTER KAF + u'\u0644' # 0xe4 -> ARABIC LETTER LAM + u'\u0645' # 0xe5 -> ARABIC LETTER MEEM + u'\u0646' # 0xe6 -> ARABIC LETTER NOON + u'\u0647' # 0xe7 -> ARABIC LETTER HEH + u'\u0648' # 0xe8 -> ARABIC LETTER WAW + u'\u0649' # 0xe9 -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0xea -> ARABIC LETTER YEH + u'\u064b' # 0xeb -> ARABIC FATHATAN + u'\u064c' # 0xec -> ARABIC DAMMATAN + u'\u064d' # 0xed -> ARABIC KASRATAN + u'\u064e' # 0xee -> ARABIC FATHA + u'\u064f' # 0xef -> ARABIC DAMMA + u'\u0650' # 0xf0 -> ARABIC KASRA + u'\u0651' # 0xf1 -> ARABIC SHADDA + u'\u0652' # 0xf2 -> ARABIC SUKUN u'\ufffe' u'\ufffe' u'\ufffe' @@ -391,215 +293,215 @@ ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x0080: 0x0080, # - 0x0081: 0x0081, # - 0x0082: 0x0082, # - 0x0083: 0x0083, # - 0x0084: 0x0084, # - 0x0085: 0x0085, # - 0x0086: 0x0086, # - 0x0087: 0x0087, # - 0x0088: 0x0088, # - 0x0089: 0x0089, # - 0x008a: 0x008a, # - 0x008b: 0x008b, # - 0x008c: 0x008c, # - 0x008d: 0x008d, # - 0x008e: 0x008e, # - 0x008f: 0x008f, # - 0x0090: 0x0090, # - 0x0091: 0x0091, # - 0x0092: 0x0092, # - 0x0093: 0x0093, # - 0x0094: 0x0094, # - 0x0095: 0x0095, # - 0x0096: 0x0096, # - 0x0097: 0x0097, # - 0x0098: 0x0098, # - 0x0099: 0x0099, # - 0x009a: 0x009a, # - 0x009b: 0x009b, # - 0x009c: 0x009c, # - 0x009d: 0x009d, # - 0x009e: 0x009e, # - 0x009f: 0x009f, # - 0x00a0: 0x00a0, # NO-BREAK SPACE - 0x00a4: 0x00a4, # CURRENCY SIGN - 0x00ad: 0x00ad, # SOFT HYPHEN - 0x060c: 0x00ac, # ARABIC COMMA - 0x061b: 0x00bb, # ARABIC SEMICOLON - 0x061f: 0x00bf, # ARABIC QUESTION MARK - 0x0621: 0x00c1, # ARABIC LETTER HAMZA - 0x0622: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x0623: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x0624: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x0625: 0x00c5, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x0626: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x0627: 0x00c7, # ARABIC LETTER ALEF - 0x0628: 0x00c8, # ARABIC LETTER BEH - 0x0629: 0x00c9, # ARABIC LETTER TEH MARBUTA - 0x062a: 0x00ca, # ARABIC LETTER TEH - 0x062b: 0x00cb, # ARABIC LETTER THEH - 0x062c: 0x00cc, # ARABIC LETTER JEEM - 0x062d: 0x00cd, # ARABIC LETTER HAH - 0x062e: 0x00ce, # ARABIC LETTER KHAH - 0x062f: 0x00cf, # ARABIC LETTER DAL - 0x0630: 0x00d0, # ARABIC LETTER THAL - 0x0631: 0x00d1, # ARABIC LETTER REH - 0x0632: 0x00d2, # ARABIC LETTER ZAIN - 0x0633: 0x00d3, # ARABIC LETTER SEEN - 0x0634: 0x00d4, # ARABIC LETTER SHEEN - 0x0635: 0x00d5, # ARABIC LETTER SAD - 0x0636: 0x00d6, # ARABIC LETTER DAD - 0x0637: 0x00d7, # ARABIC LETTER TAH - 0x0638: 0x00d8, # ARABIC LETTER ZAH - 0x0639: 0x00d9, # ARABIC LETTER AIN - 0x063a: 0x00da, # ARABIC LETTER GHAIN - 0x0640: 0x00e0, # ARABIC TATWEEL - 0x0641: 0x00e1, # ARABIC LETTER FEH - 0x0642: 0x00e2, # ARABIC LETTER QAF - 0x0643: 0x00e3, # ARABIC LETTER KAF - 0x0644: 0x00e4, # ARABIC LETTER LAM - 0x0645: 0x00e5, # ARABIC LETTER MEEM - 0x0646: 0x00e6, # ARABIC LETTER NOON - 0x0647: 0x00e7, # ARABIC LETTER HEH - 0x0648: 0x00e8, # ARABIC LETTER WAW - 0x0649: 0x00e9, # ARABIC LETTER ALEF MAKSURA - 0x064a: 0x00ea, # ARABIC LETTER YEH - 0x064b: 0x00eb, # ARABIC FATHATAN - 0x064c: 0x00ec, # ARABIC DAMMATAN - 0x064d: 0x00ed, # ARABIC KASRATAN - 0x064e: 0x00ee, # ARABIC FATHA - 0x064f: 0x00ef, # ARABIC DAMMA - 0x0650: 0x00f0, # ARABIC KASRA - 0x0651: 0x00f1, # ARABIC SHADDA - 0x0652: 0x00f2, # ARABIC SUKUN + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000a: 0x0a, # LINE FEED + 0x000b: 0x0b, # VERTICAL TABULATION + 0x000c: 0x0c, # FORM FEED + 0x000d: 0x0d, # CARRIAGE RETURN + 0x000e: 0x0e, # SHIFT OUT + 0x000f: 0x0f, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001a: 0x1a, # SUBSTITUTE + 0x001b: 0x1b, # ESCAPE + 0x001c: 0x1c, # FILE SEPARATOR + 0x001d: 0x1d, # GROUP SEPARATOR + 0x001e: 0x1e, # RECORD SEPARATOR + 0x001f: 0x1f, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002a: 0x2a, # ASTERISK + 0x002b: 0x2b, # PLUS SIGN + 0x002c: 0x2c, # COMMA + 0x002d: 0x2d, # HYPHEN-MINUS + 0x002e: 0x2e, # FULL STOP + 0x002f: 0x2f, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003a: 0x3a, # COLON + 0x003b: 0x3b, # SEMICOLON + 0x003c: 0x3c, # LESS-THAN SIGN + 0x003d: 0x3d, # EQUALS SIGN + 0x003e: 0x3e, # GREATER-THAN SIGN + 0x003f: 0x3f, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004a: 0x4a, # LATIN CAPITAL LETTER J + 0x004b: 0x4b, # LATIN CAPITAL LETTER K + 0x004c: 0x4c, # LATIN CAPITAL LETTER L + 0x004d: 0x4d, # LATIN CAPITAL LETTER M + 0x004e: 0x4e, # LATIN CAPITAL LETTER N + 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005a: 0x5a, # LATIN CAPITAL LETTER Z + 0x005b: 0x5b, # LEFT SQUARE BRACKET + 0x005c: 0x5c, # REVERSE SOLIDUS + 0x005d: 0x5d, # RIGHT SQUARE BRACKET + 0x005e: 0x5e, # CIRCUMFLEX ACCENT + 0x005f: 0x5f, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006a: 0x6a, # LATIN SMALL LETTER J + 0x006b: 0x6b, # LATIN SMALL LETTER K + 0x006c: 0x6c, # LATIN SMALL LETTER L + 0x006d: 0x6d, # LATIN SMALL LETTER M + 0x006e: 0x6e, # LATIN SMALL LETTER N + 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007a: 0x7a, # LATIN SMALL LETTER Z + 0x007b: 0x7b, # LEFT CURLY BRACKET + 0x007c: 0x7c, # VERTICAL LINE + 0x007d: 0x7d, # RIGHT CURLY BRACKET + 0x007e: 0x7e, # TILDE + 0x007f: 0x7f, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008a: 0x8a, # + 0x008b: 0x8b, # + 0x008c: 0x8c, # + 0x008d: 0x8d, # + 0x008e: 0x8e, # + 0x008f: 0x8f, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009a: 0x9a, # + 0x009b: 0x9b, # + 0x009c: 0x9c, # + 0x009d: 0x9d, # + 0x009e: 0x9e, # + 0x009f: 0x9f, # + 0x00a0: 0xa0, # NO-BREAK SPACE + 0x00a4: 0xa4, # CURRENCY SIGN + 0x00ad: 0xad, # SOFT HYPHEN + 0x060c: 0xac, # ARABIC COMMA + 0x061b: 0xbb, # ARABIC SEMICOLON + 0x061f: 0xbf, # ARABIC QUESTION MARK + 0x0621: 0xc1, # ARABIC LETTER HAMZA + 0x0622: 0xc2, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x0623: 0xc3, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x0624: 0xc4, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x0625: 0xc5, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x0626: 0xc6, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x0627: 0xc7, # ARABIC LETTER ALEF + 0x0628: 0xc8, # ARABIC LETTER BEH + 0x0629: 0xc9, # ARABIC LETTER TEH MARBUTA + 0x062a: 0xca, # ARABIC LETTER TEH + 0x062b: 0xcb, # ARABIC LETTER THEH + 0x062c: 0xcc, # ARABIC LETTER JEEM + 0x062d: 0xcd, # ARABIC LETTER HAH + 0x062e: 0xce, # ARABIC LETTER KHAH + 0x062f: 0xcf, # ARABIC LETTER DAL + 0x0630: 0xd0, # ARABIC LETTER THAL + 0x0631: 0xd1, # ARABIC LETTER REH + 0x0632: 0xd2, # ARABIC LETTER ZAIN + 0x0633: 0xd3, # ARABIC LETTER SEEN + 0x0634: 0xd4, # ARABIC LETTER SHEEN + 0x0635: 0xd5, # ARABIC LETTER SAD + 0x0636: 0xd6, # ARABIC LETTER DAD + 0x0637: 0xd7, # ARABIC LETTER TAH + 0x0638: 0xd8, # ARABIC LETTER ZAH + 0x0639: 0xd9, # ARABIC LETTER AIN + 0x063a: 0xda, # ARABIC LETTER GHAIN + 0x0640: 0xe0, # ARABIC TATWEEL + 0x0641: 0xe1, # ARABIC LETTER FEH + 0x0642: 0xe2, # ARABIC LETTER QAF + 0x0643: 0xe3, # ARABIC LETTER KAF + 0x0644: 0xe4, # ARABIC LETTER LAM + 0x0645: 0xe5, # ARABIC LETTER MEEM + 0x0646: 0xe6, # ARABIC LETTER NOON + 0x0647: 0xe7, # ARABIC LETTER HEH + 0x0648: 0xe8, # ARABIC LETTER WAW + 0x0649: 0xe9, # ARABIC LETTER ALEF MAKSURA + 0x064a: 0xea, # ARABIC LETTER YEH + 0x064b: 0xeb, # ARABIC FATHATAN + 0x064c: 0xec, # ARABIC DAMMATAN + 0x064d: 0xed, # ARABIC KASRATAN + 0x064e: 0xee, # ARABIC FATHA + 0x064f: 0xef, # ARABIC DAMMA + 0x0650: 0xf0, # ARABIC KASRA + 0x0651: 0xf1, # ARABIC SHADDA + 0x0652: 0xf2, # ARABIC SUKUN } \ No newline at end of file Index: iso8859_7.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_7.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- iso8859_7.py 21 Oct 2005 13:49:12 -0000 1.5 +++ iso8859_7.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'ISO8859/8859-7.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-7.TXT' with gencodec.py. """#" @@ -28,607 +28,522 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1089 lines suppressed...] + 0x03c2: 0xf2, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0xf3, # GREEK SMALL LETTER SIGMA + 0x03c4: 0xf4, # GREEK SMALL LETTER TAU + 0x03c5: 0xf5, # GREEK SMALL LETTER UPSILON + 0x03c6: 0xf6, # GREEK SMALL LETTER PHI + 0x03c7: 0xf7, # GREEK SMALL LETTER CHI + 0x03c8: 0xf8, # GREEK SMALL LETTER PSI + 0x03c9: 0xf9, # GREEK SMALL LETTER OMEGA + 0x03ca: 0xfa, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0xfb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0xfc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0xfd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0xfe, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2015: 0xaf, # HORIZONTAL BAR + 0x2018: 0xa1, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xa2, # RIGHT SINGLE QUOTATION MARK + 0x20ac: 0xa4, # EURO SIGN + 0x20af: 0xa5, # DRACHMA SIGN } \ No newline at end of file Index: iso8859_8.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_8.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- iso8859_8.py 21 Oct 2005 13:49:12 -0000 1.5 +++ iso8859_8.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'ISO8859/8859-8.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-8.TXT' with gencodec.py. """#" @@ -28,274 +28,201 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x00a1: None, - 0x00aa: 0x00d7, # MULTIPLICATION SIGN - 0x00ba: 0x00f7, # DIVISION SIGN - 0x00bf: None, - 0x00c0: None, - 0x00c1: None, - 0x00c2: None, - 0x00c3: None, - 0x00c4: None, - 0x00c5: None, - 0x00c6: None, - 0x00c7: None, - 0x00c8: None, - 0x00c9: None, - 0x00ca: None, - 0x00cb: None, - 0x00cc: None, - 0x00cd: None, - 0x00ce: None, - 0x00cf: None, - 0x00d0: None, - 0x00d1: None, - 0x00d2: None, - 0x00d3: None, - 0x00d4: None, - 0x00d5: None, - 0x00d6: None, - 0x00d7: None, - 0x00d8: None, - 0x00d9: None, - 0x00da: None, - 0x00db: None, - 0x00dc: None, - 0x00dd: None, - 0x00de: None, - 0x00df: 0x2017, # DOUBLE LOW LINE - 0x00e0: 0x05d0, # HEBREW LETTER ALEF - 0x00e1: 0x05d1, # HEBREW LETTER BET - 0x00e2: 0x05d2, # HEBREW LETTER GIMEL - 0x00e3: 0x05d3, # HEBREW LETTER DALET - 0x00e4: 0x05d4, # HEBREW LETTER HE - 0x00e5: 0x05d5, # HEBREW LETTER VAV - 0x00e6: 0x05d6, # HEBREW LETTER ZAYIN - 0x00e7: 0x05d7, # HEBREW LETTER HET - 0x00e8: 0x05d8, # HEBREW LETTER TET - 0x00e9: 0x05d9, # HEBREW LETTER YOD - 0x00ea: 0x05da, # HEBREW LETTER FINAL KAF - 0x00eb: 0x05db, # HEBREW LETTER KAF - 0x00ec: 0x05dc, # HEBREW LETTER LAMED - 0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM - 0x00ee: 0x05de, # HEBREW LETTER MEM - 0x00ef: 0x05df, # HEBREW LETTER FINAL NUN - 0x00f0: 0x05e0, # HEBREW LETTER NUN - 0x00f1: 0x05e1, # HEBREW LETTER SAMEKH - 0x00f2: 0x05e2, # HEBREW LETTER AYIN - 0x00f3: 0x05e3, # HEBREW LETTER FINAL PE - 0x00f4: 0x05e4, # HEBREW LETTER PE - 0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x00f6: 0x05e6, # HEBREW LETTER TSADI - 0x00f7: 0x05e7, # HEBREW LETTER QOF - 0x00f8: 0x05e8, # HEBREW LETTER RESH - 0x00f9: 0x05e9, # HEBREW LETTER SHIN - 0x00fa: 0x05ea, # HEBREW LETTER TAV - 0x00fb: None, - 0x00fc: None, - 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK - 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK - 0x00ff: None, -}) ### Decoding Table decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\x80' # 0x0080 -> - u'\x81' # 0x0081 -> - u'\x82' # 0x0082 -> - u'\x83' # 0x0083 -> - u'\x84' # 0x0084 -> - u'\x85' # 0x0085 -> - u'\x86' # 0x0086 -> - u'\x87' # 0x0087 -> - u'\x88' # 0x0088 -> - u'\x89' # 0x0089 -> - u'\x8a' # 0x008a -> - u'\x8b' # 0x008b -> - u'\x8c' # 0x008c -> - u'\x8d' # 0x008d -> - u'\x8e' # 0x008e -> - u'\x8f' # 0x008f -> - u'\x90' # 0x0090 -> - u'\x91' # 0x0091 -> - u'\x92' # 0x0092 -> - u'\x93' # 0x0093 -> - u'\x94' # 0x0094 -> - u'\x95' # 0x0095 -> - u'\x96' # 0x0096 -> - u'\x97' # 0x0097 -> - u'\x98' # 0x0098 -> - u'\x99' # 0x0099 -> - u'\x9a' # 0x009a -> - u'\x9b' # 0x009b -> - u'\x9c' # 0x009c -> - u'\x9d' # 0x009d -> - u'\x9e' # 0x009e -> - u'\x9f' # 0x009f -> - u'\xa0' # 0x00a0 -> NO-BREAK SPACE + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0a -> LINE FEED + u'\x0b' # 0x0b -> VERTICAL TABULATION + u'\x0c' # 0x0c -> FORM FEED + u'\r' # 0x0d -> CARRIAGE RETURN + u'\x0e' # 0x0e -> SHIFT OUT + u'\x0f' # 0x0f -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1a -> SUBSTITUTE + u'\x1b' # 0x1b -> ESCAPE + u'\x1c' # 0x1c -> FILE SEPARATOR + u'\x1d' # 0x1d -> GROUP SEPARATOR + u'\x1e' # 0x1e -> RECORD SEPARATOR + u'\x1f' # 0x1f -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2a -> ASTERISK + u'+' # 0x2b -> PLUS SIGN + u',' # 0x2c -> COMMA + u'-' # 0x2d -> HYPHEN-MINUS + u'.' # 0x2e -> FULL STOP + u'/' # 0x2f -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3a -> COLON + u';' # 0x3b -> SEMICOLON + u'<' # 0x3c -> LESS-THAN SIGN + u'=' # 0x3d -> EQUALS SIGN + u'>' # 0x3e -> GREATER-THAN SIGN + u'?' # 0x3f -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4a -> LATIN CAPITAL LETTER J + u'K' # 0x4b -> LATIN CAPITAL LETTER K + u'L' # 0x4c -> LATIN CAPITAL LETTER L + u'M' # 0x4d -> LATIN CAPITAL LETTER M + u'N' # 0x4e -> LATIN CAPITAL LETTER N + u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5a -> LATIN CAPITAL LETTER Z + u'[' # 0x5b -> LEFT SQUARE BRACKET + u'\\' # 0x5c -> REVERSE SOLIDUS + u']' # 0x5d -> RIGHT SQUARE BRACKET + u'^' # 0x5e -> CIRCUMFLEX ACCENT + u'_' # 0x5f -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6a -> LATIN SMALL LETTER J + u'k' # 0x6b -> LATIN SMALL LETTER K + u'l' # 0x6c -> LATIN SMALL LETTER L + u'm' # 0x6d -> LATIN SMALL LETTER M + u'n' # 0x6e -> LATIN SMALL LETTER N + u'o' # 0x6f -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7a -> LATIN SMALL LETTER Z + u'{' # 0x7b -> LEFT CURLY BRACKET + u'|' # 0x7c -> VERTICAL LINE + u'}' # 0x7d -> RIGHT CURLY BRACKET + u'~' # 0x7e -> TILDE + u'\x7f' # 0x7f -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8a -> + u'\x8b' # 0x8b -> + u'\x8c' # 0x8c -> + u'\x8d' # 0x8d -> + u'\x8e' # 0x8e -> + u'\x8f' # 0x8f -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9a -> + u'\x9b' # 0x9b -> + u'\x9c' # 0x9c -> + u'\x9d' # 0x9d -> + u'\x9e' # 0x9e -> + u'\x9f' # 0x9f -> + u'\xa0' # 0xa0 -> NO-BREAK SPACE u'\ufffe' - u'\xa2' # 0x00a2 -> CENT SIGN - u'\xa3' # 0x00a3 -> POUND SIGN - u'\xa4' # 0x00a4 -> CURRENCY SIGN - u'\xa5' # 0x00a5 -> YEN SIGN - u'\xa6' # 0x00a6 -> BROKEN BAR - u'\xa7' # 0x00a7 -> SECTION SIGN - u'\xa8' # 0x00a8 -> DIAERESIS - u'\xa9' # 0x00a9 -> COPYRIGHT SIGN - u'\xd7' # 0x00aa -> MULTIPLICATION SIGN - u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0x00ac -> NOT SIGN - u'\xad' # 0x00ad -> SOFT HYPHEN - u'\xae' # 0x00ae -> REGISTERED SIGN - u'\xaf' # 0x00af -> MACRON - u'\xb0' # 0x00b0 -> DEGREE SIGN - u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN - u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO - u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE - u'\xb4' # 0x00b4 -> ACUTE ACCENT - u'\xb5' # 0x00b5 -> MICRO SIGN - u'\xb6' # 0x00b6 -> PILCROW SIGN - u'\xb7' # 0x00b7 -> MIDDLE DOT - u'\xb8' # 0x00b8 -> CEDILLA - u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE - u'\xf7' # 0x00ba -> DIVISION SIGN - u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF - u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS + u'\xa2' # 0xa2 -> CENT SIGN + u'\xa3' # 0xa3 -> POUND SIGN + u'\xa4' # 0xa4 -> CURRENCY SIGN + u'\xa5' # 0xa5 -> YEN SIGN + u'\xa6' # 0xa6 -> BROKEN BAR + u'\xa7' # 0xa7 -> SECTION SIGN + u'\xa8' # 0xa8 -> DIAERESIS + u'\xa9' # 0xa9 -> COPYRIGHT SIGN + u'\xd7' # 0xaa -> MULTIPLICATION SIGN + u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xac -> NOT SIGN + u'\xad' # 0xad -> SOFT HYPHEN + u'\xae' # 0xae -> REGISTERED SIGN + u'\xaf' # 0xaf -> MACRON + u'\xb0' # 0xb0 -> DEGREE SIGN + u'\xb1' # 0xb1 -> PLUS-MINUS SIGN + u'\xb2' # 0xb2 -> SUPERSCRIPT TWO + u'\xb3' # 0xb3 -> SUPERSCRIPT THREE + u'\xb4' # 0xb4 -> ACUTE ACCENT + u'\xb5' # 0xb5 -> MICRO SIGN + u'\xb6' # 0xb6 -> PILCROW SIGN + u'\xb7' # 0xb7 -> MIDDLE DOT + u'\xb8' # 0xb8 -> CEDILLA + u'\xb9' # 0xb9 -> SUPERSCRIPT ONE + u'\xf7' # 0xba -> DIVISION SIGN + u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xbc -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xbe -> VULGAR FRACTION THREE QUARTERS u'\ufffe' u'\ufffe' u'\ufffe' @@ -328,262 +255,262 @@ u'\ufffe' u'\ufffe' u'\ufffe' - u'\u2017' # 0x00df -> DOUBLE LOW LINE - u'\u05d0' # 0x00e0 -> HEBREW LETTER ALEF - u'\u05d1' # 0x00e1 -> HEBREW LETTER BET - u'\u05d2' # 0x00e2 -> HEBREW LETTER GIMEL - u'\u05d3' # 0x00e3 -> HEBREW LETTER DALET - u'\u05d4' # 0x00e4 -> HEBREW LETTER HE - u'\u05d5' # 0x00e5 -> HEBREW LETTER VAV - u'\u05d6' # 0x00e6 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0x00e7 -> HEBREW LETTER HET - u'\u05d8' # 0x00e8 -> HEBREW LETTER TET - u'\u05d9' # 0x00e9 -> HEBREW LETTER YOD - u'\u05da' # 0x00ea -> HEBREW LETTER FINAL KAF - u'\u05db' # 0x00eb -> HEBREW LETTER KAF - u'\u05dc' # 0x00ec -> HEBREW LETTER LAMED - u'\u05dd' # 0x00ed -> HEBREW LETTER FINAL MEM - u'\u05de' # 0x00ee -> HEBREW LETTER MEM - u'\u05df' # 0x00ef -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0x00f0 -> HEBREW LETTER NUN - u'\u05e1' # 0x00f1 -> HEBREW LETTER SAMEKH - u'\u05e2' # 0x00f2 -> HEBREW LETTER AYIN - u'\u05e3' # 0x00f3 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0x00f4 -> HEBREW LETTER PE - u'\u05e5' # 0x00f5 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0x00f6 -> HEBREW LETTER TSADI - u'\u05e7' # 0x00f7 -> HEBREW LETTER QOF - u'\u05e8' # 0x00f8 -> HEBREW LETTER RESH - u'\u05e9' # 0x00f9 -> HEBREW LETTER SHIN - u'\u05ea' # 0x00fa -> HEBREW LETTER TAV + u'\u2017' # 0xdf -> DOUBLE LOW LINE + u'\u05d0' # 0xe0 -> HEBREW LETTER ALEF + u'\u05d1' # 0xe1 -> HEBREW LETTER BET + u'\u05d2' # 0xe2 -> HEBREW LETTER GIMEL + u'\u05d3' # 0xe3 -> HEBREW LETTER DALET + u'\u05d4' # 0xe4 -> HEBREW LETTER HE + u'\u05d5' # 0xe5 -> HEBREW LETTER VAV + u'\u05d6' # 0xe6 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0xe7 -> HEBREW LETTER HET + u'\u05d8' # 0xe8 -> HEBREW LETTER TET + u'\u05d9' # 0xe9 -> HEBREW LETTER YOD + u'\u05da' # 0xea -> HEBREW LETTER FINAL KAF + u'\u05db' # 0xeb -> HEBREW LETTER KAF + u'\u05dc' # 0xec -> HEBREW LETTER LAMED + u'\u05dd' # 0xed -> HEBREW LETTER FINAL MEM + u'\u05de' # 0xee -> HEBREW LETTER MEM + u'\u05df' # 0xef -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0xf0 -> HEBREW LETTER NUN + u'\u05e1' # 0xf1 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0xf2 -> HEBREW LETTER AYIN + u'\u05e3' # 0xf3 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0xf4 -> HEBREW LETTER PE + u'\u05e5' # 0xf5 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0xf6 -> HEBREW LETTER TSADI + u'\u05e7' # 0xf7 -> HEBREW LETTER QOF + u'\u05e8' # 0xf8 -> HEBREW LETTER RESH + u'\u05e9' # 0xf9 -> HEBREW LETTER SHIN + u'\u05ea' # 0xfa -> HEBREW LETTER TAV u'\ufffe' u'\ufffe' - u'\u200e' # 0x00fd -> LEFT-TO-RIGHT MARK - u'\u200f' # 0x00fe -> RIGHT-TO-LEFT MARK + u'\u200e' # 0xfd -> LEFT-TO-RIGHT MARK + u'\u200f' # 0xfe -> RIGHT-TO-LEFT MARK u'\ufffe' ) ### Encoding Map encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x0080: 0x0080, # - 0x0081: 0x0081, # - 0x0082: 0x0082, # - 0x0083: 0x0083, # - 0x0084: 0x0084, # - 0x0085: 0x0085, # - 0x0086: 0x0086, # - 0x0087: 0x0087, # - 0x0088: 0x0088, # - 0x0089: 0x0089, # - 0x008a: 0x008a, # - 0x008b: 0x008b, # - 0x008c: 0x008c, # - 0x008d: 0x008d, # - 0x008e: 0x008e, # - 0x008f: 0x008f, # - 0x0090: 0x0090, # - 0x0091: 0x0091, # - 0x0092: 0x0092, # - 0x0093: 0x0093, # - 0x0094: 0x0094, # - 0x0095: 0x0095, # - 0x0096: 0x0096, # - 0x0097: 0x0097, # - 0x0098: 0x0098, # - 0x0099: 0x0099, # - 0x009a: 0x009a, # - 0x009b: 0x009b, # - 0x009c: 0x009c, # - 0x009d: 0x009d, # - 0x009e: 0x009e, # - 0x009f: 0x009f, # - 0x00a0: 0x00a0, # NO-BREAK SPACE - 0x00a2: 0x00a2, # CENT SIGN - 0x00a3: 0x00a3, # POUND SIGN - 0x00a4: 0x00a4, # CURRENCY SIGN - 0x00a5: 0x00a5, # YEN SIGN - 0x00a6: 0x00a6, # BROKEN BAR - 0x00a7: 0x00a7, # SECTION SIGN - 0x00a8: 0x00a8, # DIAERESIS - 0x00a9: 0x00a9, # COPYRIGHT SIGN - 0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00ac, # NOT SIGN - 0x00ad: 0x00ad, # SOFT HYPHEN - 0x00ae: 0x00ae, # REGISTERED SIGN - 0x00af: 0x00af, # MACRON - 0x00b0: 0x00b0, # DEGREE SIGN - 0x00b1: 0x00b1, # PLUS-MINUS SIGN - 0x00b2: 0x00b2, # SUPERSCRIPT TWO - 0x00b3: 0x00b3, # SUPERSCRIPT THREE - 0x00b4: 0x00b4, # ACUTE ACCENT - 0x00b5: 0x00b5, # MICRO SIGN - 0x00b6: 0x00b6, # PILCROW SIGN - 0x00b7: 0x00b7, # MIDDLE DOT - 0x00b8: 0x00b8, # CEDILLA - 0x00b9: 0x00b9, # SUPERSCRIPT ONE - 0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00d7: 0x00aa, # MULTIPLICATION SIGN - 0x00f7: 0x00ba, # DIVISION SIGN - 0x05d0: 0x00e0, # HEBREW LETTER ALEF - 0x05d1: 0x00e1, # HEBREW LETTER BET - 0x05d2: 0x00e2, # HEBREW LETTER GIMEL - 0x05d3: 0x00e3, # HEBREW LETTER DALET - 0x05d4: 0x00e4, # HEBREW LETTER HE - 0x05d5: 0x00e5, # HEBREW LETTER VAV - 0x05d6: 0x00e6, # HEBREW LETTER ZAYIN - 0x05d7: 0x00e7, # HEBREW LETTER HET - 0x05d8: 0x00e8, # HEBREW LETTER TET - 0x05d9: 0x00e9, # HEBREW LETTER YOD - 0x05da: 0x00ea, # HEBREW LETTER FINAL KAF - 0x05db: 0x00eb, # HEBREW LETTER KAF - 0x05dc: 0x00ec, # HEBREW LETTER LAMED - 0x05dd: 0x00ed, # HEBREW LETTER FINAL MEM - 0x05de: 0x00ee, # HEBREW LETTER MEM - 0x05df: 0x00ef, # HEBREW LETTER FINAL NUN - 0x05e0: 0x00f0, # HEBREW LETTER NUN - 0x05e1: 0x00f1, # HEBREW LETTER SAMEKH - 0x05e2: 0x00f2, # HEBREW LETTER AYIN - 0x05e3: 0x00f3, # HEBREW LETTER FINAL PE - 0x05e4: 0x00f4, # HEBREW LETTER PE - 0x05e5: 0x00f5, # HEBREW LETTER FINAL TSADI - 0x05e6: 0x00f6, # HEBREW LETTER TSADI - 0x05e7: 0x00f7, # HEBREW LETTER QOF - 0x05e8: 0x00f8, # HEBREW LETTER RESH - 0x05e9: 0x00f9, # HEBREW LETTER SHIN - 0x05ea: 0x00fa, # HEBREW LETTER TAV - 0x200e: 0x00fd, # LEFT-TO-RIGHT MARK - 0x200f: 0x00fe, # RIGHT-TO-LEFT MARK - 0x2017: 0x00df, # DOUBLE LOW LINE + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000a: 0x0a, # LINE FEED + 0x000b: 0x0b, # VERTICAL TABULATION + 0x000c: 0x0c, # FORM FEED + 0x000d: 0x0d, # CARRIAGE RETURN + 0x000e: 0x0e, # SHIFT OUT + 0x000f: 0x0f, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001a: 0x1a, # SUBSTITUTE + 0x001b: 0x1b, # ESCAPE + 0x001c: 0x1c, # FILE SEPARATOR + 0x001d: 0x1d, # GROUP SEPARATOR + 0x001e: 0x1e, # RECORD SEPARATOR + 0x001f: 0x1f, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002a: 0x2a, # ASTERISK + 0x002b: 0x2b, # PLUS SIGN + 0x002c: 0x2c, # COMMA + 0x002d: 0x2d, # HYPHEN-MINUS + 0x002e: 0x2e, # FULL STOP + 0x002f: 0x2f, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003a: 0x3a, # COLON + 0x003b: 0x3b, # SEMICOLON + 0x003c: 0x3c, # LESS-THAN SIGN + 0x003d: 0x3d, # EQUALS SIGN + 0x003e: 0x3e, # GREATER-THAN SIGN + 0x003f: 0x3f, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004a: 0x4a, # LATIN CAPITAL LETTER J + 0x004b: 0x4b, # LATIN CAPITAL LETTER K + 0x004c: 0x4c, # LATIN CAPITAL LETTER L + 0x004d: 0x4d, # LATIN CAPITAL LETTER M + 0x004e: 0x4e, # LATIN CAPITAL LETTER N + 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005a: 0x5a, # LATIN CAPITAL LETTER Z + 0x005b: 0x5b, # LEFT SQUARE BRACKET + 0x005c: 0x5c, # REVERSE SOLIDUS + 0x005d: 0x5d, # RIGHT SQUARE BRACKET + 0x005e: 0x5e, # CIRCUMFLEX ACCENT + 0x005f: 0x5f, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006a: 0x6a, # LATIN SMALL LETTER J + 0x006b: 0x6b, # LATIN SMALL LETTER K + 0x006c: 0x6c, # LATIN SMALL LETTER L + 0x006d: 0x6d, # LATIN SMALL LETTER M + 0x006e: 0x6e, # LATIN SMALL LETTER N + 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007a: 0x7a, # LATIN SMALL LETTER Z + 0x007b: 0x7b, # LEFT CURLY BRACKET + 0x007c: 0x7c, # VERTICAL LINE + 0x007d: 0x7d, # RIGHT CURLY BRACKET + 0x007e: 0x7e, # TILDE + 0x007f: 0x7f, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008a: 0x8a, # + 0x008b: 0x8b, # + 0x008c: 0x8c, # + 0x008d: 0x8d, # + 0x008e: 0x8e, # + 0x008f: 0x8f, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009a: 0x9a, # + 0x009b: 0x9b, # + 0x009c: 0x9c, # + 0x009d: 0x9d, # + 0x009e: 0x9e, # + 0x009f: 0x9f, # + 0x00a0: 0xa0, # NO-BREAK SPACE + 0x00a2: 0xa2, # CENT SIGN + 0x00a3: 0xa3, # POUND SIGN + 0x00a4: 0xa4, # CURRENCY SIGN + 0x00a5: 0xa5, # YEN SIGN + 0x00a6: 0xa6, # BROKEN BAR + 0x00a7: 0xa7, # SECTION SIGN + 0x00a8: 0xa8, # DIAERESIS + 0x00a9: 0xa9, # COPYRIGHT SIGN + 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0xac, # NOT SIGN + 0x00ad: 0xad, # SOFT HYPHEN + 0x00ae: 0xae, # REGISTERED SIGN + 0x00af: 0xaf, # MACRON + 0x00b0: 0xb0, # DEGREE SIGN + 0x00b1: 0xb1, # PLUS-MINUS SIGN + 0x00b2: 0xb2, # SUPERSCRIPT TWO + 0x00b3: 0xb3, # SUPERSCRIPT THREE + 0x00b4: 0xb4, # ACUTE ACCENT + 0x00b5: 0xb5, # MICRO SIGN + 0x00b6: 0xb6, # PILCROW SIGN + 0x00b7: 0xb7, # MIDDLE DOT + 0x00b8: 0xb8, # CEDILLA + 0x00b9: 0xb9, # SUPERSCRIPT ONE + 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0xbc, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0xbd, # VULGAR FRACTION ONE HALF + 0x00be: 0xbe, # VULGAR FRACTION THREE QUARTERS + 0x00d7: 0xaa, # MULTIPLICATION SIGN + 0x00f7: 0xba, # DIVISION SIGN + 0x05d0: 0xe0, # HEBREW LETTER ALEF + 0x05d1: 0xe1, # HEBREW LETTER BET + 0x05d2: 0xe2, # HEBREW LETTER GIMEL + 0x05d3: 0xe3, # HEBREW LETTER DALET + 0x05d4: 0xe4, # HEBREW LETTER HE + 0x05d5: 0xe5, # HEBREW LETTER VAV + 0x05d6: 0xe6, # HEBREW LETTER ZAYIN + 0x05d7: 0xe7, # HEBREW LETTER HET + 0x05d8: 0xe8, # HEBREW LETTER TET + 0x05d9: 0xe9, # HEBREW LETTER YOD + 0x05da: 0xea, # HEBREW LETTER FINAL KAF + 0x05db: 0xeb, # HEBREW LETTER KAF + 0x05dc: 0xec, # HEBREW LETTER LAMED + 0x05dd: 0xed, # HEBREW LETTER FINAL MEM + 0x05de: 0xee, # HEBREW LETTER MEM + 0x05df: 0xef, # HEBREW LETTER FINAL NUN + 0x05e0: 0xf0, # HEBREW LETTER NUN + 0x05e1: 0xf1, # HEBREW LETTER SAMEKH + 0x05e2: 0xf2, # HEBREW LETTER AYIN + 0x05e3: 0xf3, # HEBREW LETTER FINAL PE + 0x05e4: 0xf4, # HEBREW LETTER PE + 0x05e5: 0xf5, # HEBREW LETTER FINAL TSADI + 0x05e6: 0xf6, # HEBREW LETTER TSADI + 0x05e7: 0xf7, # HEBREW LETTER QOF + 0x05e8: 0xf8, # HEBREW LETTER RESH + 0x05e9: 0xf9, # HEBREW LETTER SHIN + 0x05ea: 0xfa, # HEBREW LETTER TAV + 0x200e: 0xfd, # LEFT-TO-RIGHT MARK + 0x200f: 0xfe, # RIGHT-TO-LEFT MARK + 0x2017: 0xdf, # DOUBLE LOW LINE } \ No newline at end of file Index: iso8859_9.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_9.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- iso8859_9.py 21 Oct 2005 13:49:12 -0000 1.5 +++ iso8859_9.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'ISO8859/8859-9.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-9.TXT' with gencodec.py. """#" @@ -28,536 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1024 lines suppressed...] + 0x00f2: 0xf2, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0xf5, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0xf7, # DIVISION SIGN + 0x00f8: 0xf8, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0xf9, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0xfb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0xff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011e: 0xd0, # LATIN CAPITAL LETTER G WITH BREVE + 0x011f: 0xf0, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0xdd, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0xfd, # LATIN SMALL LETTER DOTLESS I + 0x015e: 0xde, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0xfe, # LATIN SMALL LETTER S WITH CEDILLA } \ No newline at end of file Index: koi8_r.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/koi8_r.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- koi8_r.py 21 Oct 2005 13:49:12 -0000 1.5 +++ koi8_r.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MISC/KOI8-R.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/MISC/KOI8-R.TXT' with gencodec.py. """#" @@ -28,658 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1146 lines suppressed...] + 0x2564: 0xb6, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0xb7, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0xb8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0xb9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0xba, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0xbb, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0xbc, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0xbd, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0xbe, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x8b, # UPPER HALF BLOCK + 0x2584: 0x8c, # LOWER HALF BLOCK + 0x2588: 0x8d, # FULL BLOCK + 0x258c: 0x8e, # LEFT HALF BLOCK + 0x2590: 0x8f, # RIGHT HALF BLOCK + 0x2591: 0x90, # LIGHT SHADE + 0x2592: 0x91, # MEDIUM SHADE + 0x2593: 0x92, # DARK SHADE + 0x25a0: 0x94, # BLACK SQUARE } \ No newline at end of file Index: koi8_u.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/koi8_u.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- koi8_u.py 23 Feb 2004 09:00:43 -0000 1.2 +++ koi8_u.py 24 Oct 2005 12:07:48 -0000 1.3 @@ -1,15 +1,8 @@ -""" Python Character Mapping Codec for KOI8U. - - This character scheme is compliant to RFC2319 - -Written by Marc-Andre Lemburg (mal at lemburg.com). -Modified by Maxim Dzumanenko . - -(c) Copyright 2002, Python Software Foundation. +""" Python Character Mapping Codec generated from 'python-mappings/KOI8-U.TXT' with gencodec.py. """#" -import codecs, koi8_r +import codecs ### Codec APIs @@ -21,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -35,20 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map -decoding_map = koi8_r.decoding_map.copy() -decoding_map.update({ - 0x00a4: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00a6: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00a7: 0x0457, # CYRILLIC SMALL LETTER YI (UKRAINIAN) - 0x00ad: 0x0491, # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN - 0x00b4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00b6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00b7: 0x0407, # CYRILLIC CAPITAL LETTER YI (UKRAINIAN) - 0x00bd: 0x0490, # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN -}) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0a -> LINE FEED + u'\x0b' # 0x0b -> VERTICAL TABULATION + u'\x0c' # 0x0c -> FORM FEED + u'\r' # 0x0d -> CARRIAGE RETURN + u'\x0e' # 0x0e -> SHIFT OUT + u'\x0f' # 0x0f -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1a -> SUBSTITUTE + u'\x1b' # 0x1b -> ESCAPE + u'\x1c' # 0x1c -> FILE SEPARATOR + u'\x1d' # 0x1d -> GROUP SEPARATOR + u'\x1e' # 0x1e -> RECORD SEPARATOR + u'\x1f' # 0x1f -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2a -> ASTERISK + u'+' # 0x2b -> PLUS SIGN + u',' # 0x2c -> COMMA + u'-' # 0x2d -> HYPHEN-MINUS + u'.' # 0x2e -> FULL STOP + u'/' # 0x2f -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3a -> COLON + u';' # 0x3b -> SEMICOLON + u'<' # 0x3c -> LESS-THAN SIGN + u'=' # 0x3d -> EQUALS SIGN + u'>' # 0x3e -> GREATER-THAN SIGN + u'?' # 0x3f -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4a -> LATIN CAPITAL LETTER J + u'K' # 0x4b -> LATIN CAPITAL LETTER K + u'L' # 0x4c -> LATIN CAPITAL LETTER L + u'M' # 0x4d -> LATIN CAPITAL LETTER M + u'N' # 0x4e -> LATIN CAPITAL LETTER N + u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5a -> LATIN CAPITAL LETTER Z + u'[' # 0x5b -> LEFT SQUARE BRACKET + u'\\' # 0x5c -> REVERSE SOLIDUS + u']' # 0x5d -> RIGHT SQUARE BRACKET + u'^' # 0x5e -> CIRCUMFLEX ACCENT + u'_' # 0x5f -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6a -> LATIN SMALL LETTER J + u'k' # 0x6b -> LATIN SMALL LETTER K + u'l' # 0x6c -> LATIN SMALL LETTER L + u'm' # 0x6d -> LATIN SMALL LETTER M + u'n' # 0x6e -> LATIN SMALL LETTER N + u'o' # 0x6f -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7a -> LATIN SMALL LETTER Z + u'{' # 0x7b -> LEFT CURLY BRACKET + u'|' # 0x7c -> VERTICAL LINE + u'}' # 0x7d -> RIGHT CURLY BRACKET + u'~' # 0x7e -> TILDE + u'\x7f' # 0x7f -> DELETE + u'\u2500' # 0x80 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u2502' # 0x81 -> BOX DRAWINGS LIGHT VERTICAL + u'\u250c' # 0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2510' # 0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2518' # 0x85 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u251c' # 0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2524' # 0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u252c' # 0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u2534' # 0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u253c' # 0x8a -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u2580' # 0x8b -> UPPER HALF BLOCK + u'\u2584' # 0x8c -> LOWER HALF BLOCK + u'\u2588' # 0x8d -> FULL BLOCK + u'\u258c' # 0x8e -> LEFT HALF BLOCK + u'\u2590' # 0x8f -> RIGHT HALF BLOCK + u'\u2591' # 0x90 -> LIGHT SHADE + u'\u2592' # 0x91 -> MEDIUM SHADE + u'\u2593' # 0x92 -> DARK SHADE + u'\u2320' # 0x93 -> TOP HALF INTEGRAL + u'\u25a0' # 0x94 -> BLACK SQUARE + u'\u2219' # 0x95 -> BULLET OPERATOR + u'\u221a' # 0x96 -> SQUARE ROOT + u'\u2248' # 0x97 -> ALMOST EQUAL TO + u'\u2264' # 0x98 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x99 -> GREATER-THAN OR EQUAL TO + u'\xa0' # 0x9a -> NO-BREAK SPACE + u'\u2321' # 0x9b -> BOTTOM HALF INTEGRAL + u'\xb0' # 0x9c -> DEGREE SIGN + u'\xb2' # 0x9d -> SUPERSCRIPT TWO + u'\xb7' # 0x9e -> MIDDLE DOT + u'\xf7' # 0x9f -> DIVISION SIGN + u'\u2550' # 0xa0 -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u2551' # 0xa1 -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2552' # 0xa2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u0451' # 0xa3 -> CYRILLIC SMALL LETTER IO + u'\u0454' # 0xa4 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u2554' # 0xa5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u0456' # 0xa6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0457' # 0xa7 -> CYRILLIC SMALL LETTER YI (UKRAINIAN) + u'\u2557' # 0xa8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u2558' # 0xa9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2559' # 0xaa -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u255a' # 0xab -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u255b' # 0xac -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u0491' # 0xad -> CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN + u'\u255d' # 0xae -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255e' # 0xaf -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0xb0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u2560' # 0xb1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2561' # 0xb2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u0401' # 0xb3 -> CYRILLIC CAPITAL LETTER IO + u'\u0404' # 0xb4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u2563' # 0xb5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u0406' # 0xb6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0407' # 0xb7 -> CYRILLIC CAPITAL LETTER YI (UKRAINIAN) + u'\u2566' # 0xb8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2567' # 0xb9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0xba -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2569' # 0xbb -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u256a' # 0xbc -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u0490' # 0xbd -> CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN + u'\u256c' # 0xbe -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa9' # 0xbf -> COPYRIGHT SIGN + u'\u044e' # 0xc0 -> CYRILLIC SMALL LETTER YU + u'\u0430' # 0xc1 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xc2 -> CYRILLIC SMALL LETTER BE + u'\u0446' # 0xc3 -> CYRILLIC SMALL LETTER TSE + u'\u0434' # 0xc4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xc5 -> CYRILLIC SMALL LETTER IE + u'\u0444' # 0xc6 -> CYRILLIC SMALL LETTER EF + u'\u0433' # 0xc7 -> CYRILLIC SMALL LETTER GHE + u'\u0445' # 0xc8 -> CYRILLIC SMALL LETTER HA + u'\u0438' # 0xc9 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xca -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xcb -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xcc -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xcd -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xce -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xcf -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xd0 -> CYRILLIC SMALL LETTER PE + u'\u044f' # 0xd1 -> CYRILLIC SMALL LETTER YA + u'\u0440' # 0xd2 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xd3 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xd4 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xd5 -> CYRILLIC SMALL LETTER U + u'\u0436' # 0xd6 -> CYRILLIC SMALL LETTER ZHE + u'\u0432' # 0xd7 -> CYRILLIC SMALL LETTER VE + u'\u044c' # 0xd8 -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044b' # 0xd9 -> CYRILLIC SMALL LETTER YERU + u'\u0437' # 0xda -> CYRILLIC SMALL LETTER ZE + u'\u0448' # 0xdb -> CYRILLIC SMALL LETTER SHA + u'\u044d' # 0xdc -> CYRILLIC SMALL LETTER E + u'\u0449' # 0xdd -> CYRILLIC SMALL LETTER SHCHA + u'\u0447' # 0xde -> CYRILLIC SMALL LETTER CHE + u'\u044a' # 0xdf -> CYRILLIC SMALL LETTER HARD SIGN + u'\u042e' # 0xe0 -> CYRILLIC CAPITAL LETTER YU + u'\u0410' # 0xe1 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0xe2 -> CYRILLIC CAPITAL LETTER BE + u'\u0426' # 0xe3 -> CYRILLIC CAPITAL LETTER TSE + u'\u0414' # 0xe4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0xe5 -> CYRILLIC CAPITAL LETTER IE + u'\u0424' # 0xe6 -> CYRILLIC CAPITAL LETTER EF + u'\u0413' # 0xe7 -> CYRILLIC CAPITAL LETTER GHE + u'\u0425' # 0xe8 -> CYRILLIC CAPITAL LETTER HA + u'\u0418' # 0xe9 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0xea -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0xeb -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0xec -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0xed -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0xee -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0xef -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0xf0 -> CYRILLIC CAPITAL LETTER PE + u'\u042f' # 0xf1 -> CYRILLIC CAPITAL LETTER YA + u'\u0420' # 0xf2 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0xf3 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0xf4 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0xf5 -> CYRILLIC CAPITAL LETTER U + u'\u0416' # 0xf6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0412' # 0xf7 -> CYRILLIC CAPITAL LETTER VE + u'\u042c' # 0xf8 -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042b' # 0xf9 -> CYRILLIC CAPITAL LETTER YERU + u'\u0417' # 0xfa -> CYRILLIC CAPITAL LETTER ZE + u'\u0428' # 0xfb -> CYRILLIC CAPITAL LETTER SHA + u'\u042d' # 0xfc -> CYRILLIC CAPITAL LETTER E + u'\u0429' # 0xfd -> CYRILLIC CAPITAL LETTER SHCHA + u'\u0427' # 0xfe -> CYRILLIC CAPITAL LETTER CHE + u'\u042a' # 0xff -> CYRILLIC CAPITAL LETTER HARD SIGN +) ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000a: 0x0a, # LINE FEED + 0x000b: 0x0b, # VERTICAL TABULATION + 0x000c: 0x0c, # FORM FEED + 0x000d: 0x0d, # CARRIAGE RETURN + 0x000e: 0x0e, # SHIFT OUT + 0x000f: 0x0f, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001a: 0x1a, # SUBSTITUTE + 0x001b: 0x1b, # ESCAPE + 0x001c: 0x1c, # FILE SEPARATOR + 0x001d: 0x1d, # GROUP SEPARATOR + 0x001e: 0x1e, # RECORD SEPARATOR + 0x001f: 0x1f, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002a: 0x2a, # ASTERISK + 0x002b: 0x2b, # PLUS SIGN + 0x002c: 0x2c, # COMMA + 0x002d: 0x2d, # HYPHEN-MINUS + 0x002e: 0x2e, # FULL STOP + 0x002f: 0x2f, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003a: 0x3a, # COLON + 0x003b: 0x3b, # SEMICOLON + 0x003c: 0x3c, # LESS-THAN SIGN + 0x003d: 0x3d, # EQUALS SIGN + 0x003e: 0x3e, # GREATER-THAN SIGN + 0x003f: 0x3f, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004a: 0x4a, # LATIN CAPITAL LETTER J + 0x004b: 0x4b, # LATIN CAPITAL LETTER K + 0x004c: 0x4c, # LATIN CAPITAL LETTER L + 0x004d: 0x4d, # LATIN CAPITAL LETTER M + 0x004e: 0x4e, # LATIN CAPITAL LETTER N + 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005a: 0x5a, # LATIN CAPITAL LETTER Z + 0x005b: 0x5b, # LEFT SQUARE BRACKET + 0x005c: 0x5c, # REVERSE SOLIDUS + 0x005d: 0x5d, # RIGHT SQUARE BRACKET + 0x005e: 0x5e, # CIRCUMFLEX ACCENT + 0x005f: 0x5f, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006a: 0x6a, # LATIN SMALL LETTER J + 0x006b: 0x6b, # LATIN SMALL LETTER K + 0x006c: 0x6c, # LATIN SMALL LETTER L + 0x006d: 0x6d, # LATIN SMALL LETTER M + 0x006e: 0x6e, # LATIN SMALL LETTER N + 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007a: 0x7a, # LATIN SMALL LETTER Z + 0x007b: 0x7b, # LEFT CURLY BRACKET + 0x007c: 0x7c, # VERTICAL LINE + 0x007d: 0x7d, # RIGHT CURLY BRACKET + 0x007e: 0x7e, # TILDE + 0x007f: 0x7f, # DELETE + 0x00a0: 0x9a, # NO-BREAK SPACE + 0x00a9: 0xbf, # COPYRIGHT SIGN + 0x00b0: 0x9c, # DEGREE SIGN + 0x00b2: 0x9d, # SUPERSCRIPT TWO + 0x00b7: 0x9e, # MIDDLE DOT + 0x00f7: 0x9f, # DIVISION SIGN + 0x0401: 0xb3, # CYRILLIC CAPITAL LETTER IO + 0x0404: 0xb4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0406: 0xb6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0xb7, # CYRILLIC CAPITAL LETTER YI (UKRAINIAN) + 0x0410: 0xe1, # CYRILLIC CAPITAL LETTER A + 0x0411: 0xe2, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0xf7, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0xe7, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0xe4, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0xe5, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0xf6, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0xfa, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0xe9, # CYRILLIC CAPITAL LETTER I + 0x0419: 0xea, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0xeb, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0xec, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0xed, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0xee, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0xef, # CYRILLIC CAPITAL LETTER O + 0x041f: 0xf0, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0xf2, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0xf3, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0xf4, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0xf5, # CYRILLIC CAPITAL LETTER U + 0x0424: 0xe6, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0xe8, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0xe3, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0xfe, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0xfb, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0xfd, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0xff, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0xf9, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0xf8, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0xfc, # CYRILLIC CAPITAL LETTER E + 0x042e: 0xe0, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0xf1, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0xc1, # CYRILLIC SMALL LETTER A + 0x0431: 0xc2, # CYRILLIC SMALL LETTER BE + 0x0432: 0xd7, # CYRILLIC SMALL LETTER VE + 0x0433: 0xc7, # CYRILLIC SMALL LETTER GHE + 0x0434: 0xc4, # CYRILLIC SMALL LETTER DE + 0x0435: 0xc5, # CYRILLIC SMALL LETTER IE + 0x0436: 0xd6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0xda, # CYRILLIC SMALL LETTER ZE + 0x0438: 0xc9, # CYRILLIC SMALL LETTER I + 0x0439: 0xca, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0xcb, # CYRILLIC SMALL LETTER KA + 0x043b: 0xcc, # CYRILLIC SMALL LETTER EL + 0x043c: 0xcd, # CYRILLIC SMALL LETTER EM + 0x043d: 0xce, # CYRILLIC SMALL LETTER EN + 0x043e: 0xcf, # CYRILLIC SMALL LETTER O + 0x043f: 0xd0, # CYRILLIC SMALL LETTER PE + 0x0440: 0xd2, # CYRILLIC SMALL LETTER ER + 0x0441: 0xd3, # CYRILLIC SMALL LETTER ES + 0x0442: 0xd4, # CYRILLIC SMALL LETTER TE + 0x0443: 0xd5, # CYRILLIC SMALL LETTER U + 0x0444: 0xc6, # CYRILLIC SMALL LETTER EF + 0x0445: 0xc8, # CYRILLIC SMALL LETTER HA + 0x0446: 0xc3, # CYRILLIC SMALL LETTER TSE + 0x0447: 0xde, # CYRILLIC SMALL LETTER CHE + 0x0448: 0xdb, # CYRILLIC SMALL LETTER SHA + 0x0449: 0xdd, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0xdf, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0xd9, # CYRILLIC SMALL LETTER YERU + 0x044c: 0xd8, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0xdc, # CYRILLIC SMALL LETTER E + 0x044e: 0xc0, # CYRILLIC SMALL LETTER YU + 0x044f: 0xd1, # CYRILLIC SMALL LETTER YA + 0x0451: 0xa3, # CYRILLIC SMALL LETTER IO + 0x0454: 0xa4, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0456: 0xa6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0xa7, # CYRILLIC SMALL LETTER YI (UKRAINIAN) + 0x0490: 0xbd, # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN + 0x0491: 0xad, # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN + 0x2219: 0x95, # BULLET OPERATOR + 0x221a: 0x96, # SQUARE ROOT + 0x2248: 0x97, # ALMOST EQUAL TO + 0x2264: 0x98, # LESS-THAN OR EQUAL TO + 0x2265: 0x99, # GREATER-THAN OR EQUAL TO + 0x2320: 0x93, # TOP HALF INTEGRAL + 0x2321: 0x9b, # BOTTOM HALF INTEGRAL + 0x2500: 0x80, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x81, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x82, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x83, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x84, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x85, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x86, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x87, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x88, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x89, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x8a, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0xa0, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0xa1, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0xa2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2554: 0xa5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0xa8, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0xa9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0xaa, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0xab, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0xac, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255d: 0xae, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0xaf, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0xb0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0xb1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0xb2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2563: 0xb5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0xb8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0xb9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0xba, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0xbb, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0xbc, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256c: 0xbe, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x8b, # UPPER HALF BLOCK + 0x2584: 0x8c, # LOWER HALF BLOCK + 0x2588: 0x8d, # FULL BLOCK + 0x258c: 0x8e, # LEFT HALF BLOCK + 0x2590: 0x8f, # RIGHT HALF BLOCK + 0x2591: 0x90, # LIGHT SHADE + 0x2592: 0x91, # MEDIUM SHADE + 0x2593: 0x92, # DARK SHADE + 0x25a0: 0x94, # BLACK SQUARE +} \ No newline at end of file Index: mac_centeuro.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_centeuro.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- mac_centeuro.py 21 Oct 2005 13:58:31 -0000 1.1 +++ mac_centeuro.py 24 Oct 2005 12:07:48 -0000 1.2 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/APPLE/CENTEURO.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/APPLE/CENTEURO.TXT' with gencodec.py. """#" @@ -28,656 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1144 lines suppressed...] + 0x201a: 0xe2, # SINGLE LOW-9 QUOTATION MARK + 0x201c: 0xd2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0xd3, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0xe3, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0xa0, # DAGGER + 0x2022: 0xa5, # BULLET + 0x2026: 0xc9, # HORIZONTAL ELLIPSIS + 0x2039: 0xdc, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0xdd, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x2122: 0xaa, # TRADE MARK SIGN + 0x2202: 0xb6, # PARTIAL DIFFERENTIAL + 0x2206: 0xc6, # INCREMENT + 0x2211: 0xb7, # N-ARY SUMMATION + 0x221a: 0xc3, # SQUARE ROOT + 0x2260: 0xad, # NOT EQUAL TO + 0x2264: 0xb2, # LESS-THAN OR EQUAL TO + 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO + 0x25ca: 0xd7, # LOZENGE } \ No newline at end of file Index: mac_croatian.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_croatian.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- mac_croatian.py 21 Oct 2005 13:58:32 -0000 1.1 +++ mac_croatian.py 24 Oct 2005 12:07:48 -0000 1.2 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/APPLE/CROATIAN.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/APPLE/CROATIAN.TXT' with gencodec.py. """#" @@ -28,654 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1142 lines suppressed...] + 0x2039: 0xdc, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0xdd, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x2044: 0xda, # FRACTION SLASH + 0x20ac: 0xdb, # EURO SIGN + 0x2122: 0xaa, # TRADE MARK SIGN + 0x2202: 0xb6, # PARTIAL DIFFERENTIAL + 0x2206: 0xb4, # INCREMENT + 0x220f: 0xb8, # N-ARY PRODUCT + 0x2211: 0xb7, # N-ARY SUMMATION + 0x221a: 0xc3, # SQUARE ROOT + 0x221e: 0xb0, # INFINITY + 0x222b: 0xba, # INTEGRAL + 0x2248: 0xc5, # ALMOST EQUAL TO + 0x2260: 0xad, # NOT EQUAL TO + 0x2264: 0xb2, # LESS-THAN OR EQUAL TO + 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO + 0x25ca: 0xd7, # LOZENGE + 0xf8ff: 0xd8, # Apple logo } \ No newline at end of file Index: mac_cyrillic.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_cyrillic.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- mac_cyrillic.py 21 Oct 2005 13:49:12 -0000 1.5 +++ mac_cyrillic.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/APPLE/CYRILLIC.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/APPLE/CYRILLIC.TXT' with gencodec.py. """#" @@ -28,654 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1142 lines suppressed...] + 0x2018: 0xd4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xd5, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0xd2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0xd3, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0xd7, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0xa0, # DAGGER + 0x2022: 0xa5, # BULLET + 0x2026: 0xc9, # HORIZONTAL ELLIPSIS + 0x20ac: 0xff, # EURO SIGN + 0x2116: 0xdc, # NUMERO SIGN + 0x2122: 0xaa, # TRADE MARK SIGN + 0x2206: 0xc6, # INCREMENT + 0x221a: 0xc3, # SQUARE ROOT + 0x221e: 0xb0, # INFINITY + 0x2248: 0xc5, # ALMOST EQUAL TO + 0x2260: 0xad, # NOT EQUAL TO + 0x2264: 0xb2, # LESS-THAN OR EQUAL TO + 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO } \ No newline at end of file Index: mac_farsi.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_farsi.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- mac_farsi.py 21 Oct 2005 13:58:32 -0000 1.1 +++ mac_farsi.py 24 Oct 2005 12:07:48 -0000 1.2 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/APPLE/FARSI.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/APPLE/FARSI.TXT' with gencodec.py. """#" @@ -28,658 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1146 lines suppressed...] + 0x0698: 0xfe, # ARABIC LETTER JEH + 0x06a4: 0xf7, # ARABIC LETTER VEH + 0x06af: 0xf8, # ARABIC LETTER GAF + 0x06ba: 0x8b, # ARABIC LETTER NOON GHUNNA + 0x06d2: 0xff, # ARABIC LETTER YEH BARREE + 0x06d5: 0xf6, # ARABIC LETTER AE + 0x06f0: 0xb0, # EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) + 0x06f1: 0xb1, # EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) + 0x06f2: 0xb2, # EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) + 0x06f3: 0xb3, # EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) + 0x06f4: 0xb4, # EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) + 0x06f5: 0xb5, # EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) + 0x06f6: 0xb6, # EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) + 0x06f7: 0xb7, # EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) + 0x06f8: 0xb8, # EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) + 0x06f9: 0xb9, # EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) + 0x2026: 0x93, # HORIZONTAL ELLIPSIS, right-left + 0x274a: 0xc0, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left } \ No newline at end of file Index: mac_greek.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_greek.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- mac_greek.py 21 Oct 2005 13:49:12 -0000 1.5 +++ mac_greek.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/APPLE/GREEK.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/APPLE/GREEK.TXT' with gencodec.py. """#" @@ -28,656 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1144 lines suppressed...] + 0x03cd: 0xe0, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0xf1, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2013: 0xd0, # EN DASH + 0x2015: 0xd1, # HORIZONTAL BAR + 0x2018: 0xd4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xd5, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0xd2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0xd3, # RIGHT DOUBLE QUOTATION MARK + 0x2020: 0xa0, # DAGGER + 0x2022: 0x96, # BULLET + 0x2026: 0xc9, # HORIZONTAL ELLIPSIS + 0x2030: 0x98, # PER MILLE SIGN + 0x20ac: 0x9c, # EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN + 0x2122: 0x93, # TRADE MARK SIGN + 0x2248: 0xc5, # ALMOST EQUAL TO + 0x2260: 0xad, # NOT EQUAL TO + 0x2264: 0xb2, # LESS-THAN OR EQUAL TO + 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO } \ No newline at end of file Index: mac_iceland.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_iceland.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- mac_iceland.py 21 Oct 2005 13:49:12 -0000 1.5 +++ mac_iceland.py 24 Oct 2005 12:07:48 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/APPLE/ICELAND.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/APPLE/ICELAND.TXT' with gencodec.py. """#" @@ -28,652 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1140 lines suppressed...] + 0x2026: 0xc9, # HORIZONTAL ELLIPSIS + 0x2030: 0xe4, # PER MILLE SIGN + 0x2044: 0xda, # FRACTION SLASH + 0x20ac: 0xdb, # EURO SIGN + 0x2122: 0xaa, # TRADE MARK SIGN + 0x2202: 0xb6, # PARTIAL DIFFERENTIAL + 0x2206: 0xc6, # INCREMENT + 0x220f: 0xb8, # N-ARY PRODUCT + 0x2211: 0xb7, # N-ARY SUMMATION + 0x221a: 0xc3, # SQUARE ROOT + 0x221e: 0xb0, # INFINITY + 0x222b: 0xba, # INTEGRAL + 0x2248: 0xc5, # ALMOST EQUAL TO + 0x2260: 0xad, # NOT EQUAL TO + 0x2264: 0xb2, # LESS-THAN OR EQUAL TO + 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO + 0x25ca: 0xd7, # LOZENGE + 0xf8ff: 0xf0, # Apple logo } \ No newline at end of file Index: mac_roman.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_roman.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- mac_roman.py 21 Oct 2005 13:49:12 -0000 1.5 +++ mac_roman.py 24 Oct 2005 12:07:49 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/APPLE/ROMAN.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/APPLE/ROMAN.TXT' with gencodec.py. """#" @@ -28,653 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1141 lines suppressed...] + 0x2044: 0xda, # FRACTION SLASH + 0x20ac: 0xdb, # EURO SIGN + 0x2122: 0xaa, # TRADE MARK SIGN + 0x2202: 0xb6, # PARTIAL DIFFERENTIAL + 0x2206: 0xc6, # INCREMENT + 0x220f: 0xb8, # N-ARY PRODUCT + 0x2211: 0xb7, # N-ARY SUMMATION + 0x221a: 0xc3, # SQUARE ROOT + 0x221e: 0xb0, # INFINITY + 0x222b: 0xba, # INTEGRAL + 0x2248: 0xc5, # ALMOST EQUAL TO + 0x2260: 0xad, # NOT EQUAL TO + 0x2264: 0xb2, # LESS-THAN OR EQUAL TO + 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO + 0x25ca: 0xd7, # LOZENGE + 0xf8ff: 0xf0, # Apple logo + 0xfb01: 0xde, # LATIN SMALL LIGATURE FI + 0xfb02: 0xdf, # LATIN SMALL LIGATURE FL } \ No newline at end of file Index: mac_romanian.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_romanian.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- mac_romanian.py 21 Oct 2005 13:58:32 -0000 1.1 +++ mac_romanian.py 24 Oct 2005 12:07:49 -0000 1.2 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/APPLE/ROMANIAN.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/APPLE/ROMANIAN.TXT' with gencodec.py. """#" @@ -28,653 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1141 lines suppressed...] + 0x2039: 0xdc, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203a: 0xdd, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x2044: 0xda, # FRACTION SLASH + 0x20ac: 0xdb, # EURO SIGN + 0x2122: 0xaa, # TRADE MARK SIGN + 0x2202: 0xb6, # PARTIAL DIFFERENTIAL + 0x2206: 0xc6, # INCREMENT + 0x220f: 0xb8, # N-ARY PRODUCT + 0x2211: 0xb7, # N-ARY SUMMATION + 0x221a: 0xc3, # SQUARE ROOT + 0x221e: 0xb0, # INFINITY + 0x222b: 0xba, # INTEGRAL + 0x2248: 0xc5, # ALMOST EQUAL TO + 0x2260: 0xad, # NOT EQUAL TO + 0x2264: 0xb2, # LESS-THAN OR EQUAL TO + 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO + 0x25ca: 0xd7, # LOZENGE + 0xf8ff: 0xf0, # Apple logo } \ No newline at end of file Index: mac_turkish.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_turkish.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- mac_turkish.py 21 Oct 2005 13:49:12 -0000 1.5 +++ mac_turkish.py 24 Oct 2005 12:07:49 -0000 1.6 @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/APPLE/TURKISH.TXT' with gencodec.py. +""" Python Character Mapping Codec generated from 'MAPPINGS/VENDORS/APPLE/TURKISH.TXT' with gencodec.py. """#" @@ -28,653 +28,525 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map - [...1141 lines suppressed...] + 0x2022: 0xa5, # BULLET + 0x2026: 0xc9, # HORIZONTAL ELLIPSIS + 0x2030: 0xe4, # PER MILLE SIGN + 0x2122: 0xaa, # TRADE MARK SIGN + 0x2202: 0xb6, # PARTIAL DIFFERENTIAL + 0x2206: 0xc6, # INCREMENT + 0x220f: 0xb8, # N-ARY PRODUCT + 0x2211: 0xb7, # N-ARY SUMMATION + 0x221a: 0xc3, # SQUARE ROOT + 0x221e: 0xb0, # INFINITY + 0x222b: 0xba, # INTEGRAL + 0x2248: 0xc5, # ALMOST EQUAL TO + 0x2260: 0xad, # NOT EQUAL TO + 0x2264: 0xb2, # LESS-THAN OR EQUAL TO + 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO + 0x25ca: 0xd7, # LOZENGE + 0xf8a0: 0xf5, # undefined1 + 0xf8ff: 0xf0, # Apple logo } \ No newline at end of file Index: tis_620.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/tis_620.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- tis_620.py 7 Aug 2004 06:03:09 -0000 1.2 +++ tis_620.py 24 Oct 2005 12:07:49 -0000 1.3 @@ -1,14 +1,8 @@ -""" Python Character Mapping Codec for TIS-620. - - According to - ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT the - TIS-620 is the identical to ISO_8859-11 with the 0xA0 (no-break - space) mapping removed. +""" Python Character Mapping Codec generated from 'python-mappings/TIS-620.TXT' with gencodec.py. """#" import codecs -from encodings.iso8859_11 import decoding_map ### Codec APIs @@ -20,8 +14,8 @@ def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - + return codecs.charmap_decode(input,errors,decoding_table) + class StreamWriter(Codec,codecs.StreamWriter): pass @@ -34,13 +28,516 @@ return (Codec().encode,Codec().decode,StreamReader,StreamWriter) -### Decoding Map -decoding_map = decoding_map.copy() -decoding_map.update({ - 0x00a0: None, -}) +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0a -> LINE FEED + u'\x0b' # 0x0b -> VERTICAL TABULATION + u'\x0c' # 0x0c -> FORM FEED + u'\r' # 0x0d -> CARRIAGE RETURN + u'\x0e' # 0x0e -> SHIFT OUT + u'\x0f' # 0x0f -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1a -> SUBSTITUTE + u'\x1b' # 0x1b -> ESCAPE + u'\x1c' # 0x1c -> FILE SEPARATOR + u'\x1d' # 0x1d -> GROUP SEPARATOR + u'\x1e' # 0x1e -> RECORD SEPARATOR + u'\x1f' # 0x1f -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2a -> ASTERISK + u'+' # 0x2b -> PLUS SIGN + u',' # 0x2c -> COMMA + u'-' # 0x2d -> HYPHEN-MINUS + u'.' # 0x2e -> FULL STOP + u'/' # 0x2f -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3a -> COLON + u';' # 0x3b -> SEMICOLON + u'<' # 0x3c -> LESS-THAN SIGN + u'=' # 0x3d -> EQUALS SIGN + u'>' # 0x3e -> GREATER-THAN SIGN + u'?' # 0x3f -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4a -> LATIN CAPITAL LETTER J + u'K' # 0x4b -> LATIN CAPITAL LETTER K + u'L' # 0x4c -> LATIN CAPITAL LETTER L + u'M' # 0x4d -> LATIN CAPITAL LETTER M + u'N' # 0x4e -> LATIN CAPITAL LETTER N + u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5a -> LATIN CAPITAL LETTER Z + u'[' # 0x5b -> LEFT SQUARE BRACKET + u'\\' # 0x5c -> REVERSE SOLIDUS + u']' # 0x5d -> RIGHT SQUARE BRACKET + u'^' # 0x5e -> CIRCUMFLEX ACCENT + u'_' # 0x5f -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6a -> LATIN SMALL LETTER J + u'k' # 0x6b -> LATIN SMALL LETTER K + u'l' # 0x6c -> LATIN SMALL LETTER L + u'm' # 0x6d -> LATIN SMALL LETTER M + u'n' # 0x6e -> LATIN SMALL LETTER N + u'o' # 0x6f -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7a -> LATIN SMALL LETTER Z + u'{' # 0x7b -> LEFT CURLY BRACKET + u'|' # 0x7c -> VERTICAL LINE + u'}' # 0x7d -> RIGHT CURLY BRACKET + u'~' # 0x7e -> TILDE + u'\x7f' # 0x7f -> DELETE + u'\x80' # 0x80 -> + u'\x81' # 0x81 -> + u'\x82' # 0x82 -> + u'\x83' # 0x83 -> + u'\x84' # 0x84 -> + u'\x85' # 0x85 -> + u'\x86' # 0x86 -> + u'\x87' # 0x87 -> + u'\x88' # 0x88 -> + u'\x89' # 0x89 -> + u'\x8a' # 0x8a -> + u'\x8b' # 0x8b -> + u'\x8c' # 0x8c -> + u'\x8d' # 0x8d -> + u'\x8e' # 0x8e -> + u'\x8f' # 0x8f -> + u'\x90' # 0x90 -> + u'\x91' # 0x91 -> + u'\x92' # 0x92 -> + u'\x93' # 0x93 -> + u'\x94' # 0x94 -> + u'\x95' # 0x95 -> + u'\x96' # 0x96 -> + u'\x97' # 0x97 -> + u'\x98' # 0x98 -> + u'\x99' # 0x99 -> + u'\x9a' # 0x9a -> + u'\x9b' # 0x9b -> + u'\x9c' # 0x9c -> + u'\x9d' # 0x9d -> + u'\x9e' # 0x9e -> + u'\x9f' # 0x9f -> + u'\ufffe' + u'\u0e01' # 0xa1 -> THAI CHARACTER KO KAI + u'\u0e02' # 0xa2 -> THAI CHARACTER KHO KHAI + u'\u0e03' # 0xa3 -> THAI CHARACTER KHO KHUAT + u'\u0e04' # 0xa4 -> THAI CHARACTER KHO KHWAI + u'\u0e05' # 0xa5 -> THAI CHARACTER KHO KHON + u'\u0e06' # 0xa6 -> THAI CHARACTER KHO RAKHANG + u'\u0e07' # 0xa7 -> THAI CHARACTER NGO NGU + u'\u0e08' # 0xa8 -> THAI CHARACTER CHO CHAN + u'\u0e09' # 0xa9 -> THAI CHARACTER CHO CHING + u'\u0e0a' # 0xaa -> THAI CHARACTER CHO CHANG + u'\u0e0b' # 0xab -> THAI CHARACTER SO SO + u'\u0e0c' # 0xac -> THAI CHARACTER CHO CHOE + u'\u0e0d' # 0xad -> THAI CHARACTER YO YING + u'\u0e0e' # 0xae -> THAI CHARACTER DO CHADA + u'\u0e0f' # 0xaf -> THAI CHARACTER TO PATAK + u'\u0e10' # 0xb0 -> THAI CHARACTER THO THAN + u'\u0e11' # 0xb1 -> THAI CHARACTER THO NANGMONTHO + u'\u0e12' # 0xb2 -> THAI CHARACTER THO PHUTHAO + u'\u0e13' # 0xb3 -> THAI CHARACTER NO NEN + u'\u0e14' # 0xb4 -> THAI CHARACTER DO DEK + u'\u0e15' # 0xb5 -> THAI CHARACTER TO TAO + u'\u0e16' # 0xb6 -> THAI CHARACTER THO THUNG + u'\u0e17' # 0xb7 -> THAI CHARACTER THO THAHAN + u'\u0e18' # 0xb8 -> THAI CHARACTER THO THONG + u'\u0e19' # 0xb9 -> THAI CHARACTER NO NU + u'\u0e1a' # 0xba -> THAI CHARACTER BO BAIMAI + u'\u0e1b' # 0xbb -> THAI CHARACTER PO PLA + u'\u0e1c' # 0xbc -> THAI CHARACTER PHO PHUNG + u'\u0e1d' # 0xbd -> THAI CHARACTER FO FA + u'\u0e1e' # 0xbe -> THAI CHARACTER PHO PHAN + u'\u0e1f' # 0xbf -> THAI CHARACTER FO FAN + u'\u0e20' # 0xc0 -> THAI CHARACTER PHO SAMPHAO + u'\u0e21' # 0xc1 -> THAI CHARACTER MO MA + u'\u0e22' # 0xc2 -> THAI CHARACTER YO YAK + u'\u0e23' # 0xc3 -> THAI CHARACTER RO RUA + u'\u0e24' # 0xc4 -> THAI CHARACTER RU + u'\u0e25' # 0xc5 -> THAI CHARACTER LO LING + u'\u0e26' # 0xc6 -> THAI CHARACTER LU + u'\u0e27' # 0xc7 -> THAI CHARACTER WO WAEN + u'\u0e28' # 0xc8 -> THAI CHARACTER SO SALA + u'\u0e29' # 0xc9 -> THAI CHARACTER SO RUSI + u'\u0e2a' # 0xca -> THAI CHARACTER SO SUA + u'\u0e2b' # 0xcb -> THAI CHARACTER HO HIP + u'\u0e2c' # 0xcc -> THAI CHARACTER LO CHULA + u'\u0e2d' # 0xcd -> THAI CHARACTER O ANG + u'\u0e2e' # 0xce -> THAI CHARACTER HO NOKHUK + u'\u0e2f' # 0xcf -> THAI CHARACTER PAIYANNOI + u'\u0e30' # 0xd0 -> THAI CHARACTER SARA A + u'\u0e31' # 0xd1 -> THAI CHARACTER MAI HAN-AKAT + u'\u0e32' # 0xd2 -> THAI CHARACTER SARA AA + u'\u0e33' # 0xd3 -> THAI CHARACTER SARA AM + u'\u0e34' # 0xd4 -> THAI CHARACTER SARA I + u'\u0e35' # 0xd5 -> THAI CHARACTER SARA II + u'\u0e36' # 0xd6 -> THAI CHARACTER SARA UE + u'\u0e37' # 0xd7 -> THAI CHARACTER SARA UEE + u'\u0e38' # 0xd8 -> THAI CHARACTER SARA U + u'\u0e39' # 0xd9 -> THAI CHARACTER SARA UU + u'\u0e3a' # 0xda -> THAI CHARACTER PHINTHU + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u0e3f' # 0xdf -> THAI CURRENCY SYMBOL BAHT + u'\u0e40' # 0xe0 -> THAI CHARACTER SARA E + u'\u0e41' # 0xe1 -> THAI CHARACTER SARA AE + u'\u0e42' # 0xe2 -> THAI CHARACTER SARA O + u'\u0e43' # 0xe3 -> THAI CHARACTER SARA AI MAIMUAN + u'\u0e44' # 0xe4 -> THAI CHARACTER SARA AI MAIMALAI + u'\u0e45' # 0xe5 -> THAI CHARACTER LAKKHANGYAO + u'\u0e46' # 0xe6 -> THAI CHARACTER MAIYAMOK + u'\u0e47' # 0xe7 -> THAI CHARACTER MAITAIKHU + u'\u0e48' # 0xe8 -> THAI CHARACTER MAI EK + u'\u0e49' # 0xe9 -> THAI CHARACTER MAI THO + u'\u0e4a' # 0xea -> THAI CHARACTER MAI TRI + u'\u0e4b' # 0xeb -> THAI CHARACTER MAI CHATTAWA + u'\u0e4c' # 0xec -> THAI CHARACTER THANTHAKHAT + u'\u0e4d' # 0xed -> THAI CHARACTER NIKHAHIT + u'\u0e4e' # 0xee -> THAI CHARACTER YAMAKKAN + u'\u0e4f' # 0xef -> THAI CHARACTER FONGMAN + u'\u0e50' # 0xf0 -> THAI DIGIT ZERO + u'\u0e51' # 0xf1 -> THAI DIGIT ONE + u'\u0e52' # 0xf2 -> THAI DIGIT TWO + u'\u0e53' # 0xf3 -> THAI DIGIT THREE + u'\u0e54' # 0xf4 -> THAI DIGIT FOUR + u'\u0e55' # 0xf5 -> THAI DIGIT FIVE + u'\u0e56' # 0xf6 -> THAI DIGIT SIX + u'\u0e57' # 0xf7 -> THAI DIGIT SEVEN + u'\u0e58' # 0xf8 -> THAI DIGIT EIGHT + u'\u0e59' # 0xf9 -> THAI DIGIT NINE + u'\u0e5a' # 0xfa -> THAI CHARACTER ANGKHANKHU + u'\u0e5b' # 0xfb -> THAI CHARACTER KHOMUT + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' +) ### Encoding Map -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = { + 0x0000: 0x00, # NULL + 0x0001: 0x01, # START OF HEADING + 0x0002: 0x02, # START OF TEXT + 0x0003: 0x03, # END OF TEXT + 0x0004: 0x04, # END OF TRANSMISSION + 0x0005: 0x05, # ENQUIRY + 0x0006: 0x06, # ACKNOWLEDGE + 0x0007: 0x07, # BELL + 0x0008: 0x08, # BACKSPACE + 0x0009: 0x09, # HORIZONTAL TABULATION + 0x000a: 0x0a, # LINE FEED + 0x000b: 0x0b, # VERTICAL TABULATION + 0x000c: 0x0c, # FORM FEED + 0x000d: 0x0d, # CARRIAGE RETURN + 0x000e: 0x0e, # SHIFT OUT + 0x000f: 0x0f, # SHIFT IN + 0x0010: 0x10, # DATA LINK ESCAPE + 0x0011: 0x11, # DEVICE CONTROL ONE + 0x0012: 0x12, # DEVICE CONTROL TWO + 0x0013: 0x13, # DEVICE CONTROL THREE + 0x0014: 0x14, # DEVICE CONTROL FOUR + 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x16, # SYNCHRONOUS IDLE + 0x0017: 0x17, # END OF TRANSMISSION BLOCK + 0x0018: 0x18, # CANCEL + 0x0019: 0x19, # END OF MEDIUM + 0x001a: 0x1a, # SUBSTITUTE + 0x001b: 0x1b, # ESCAPE + 0x001c: 0x1c, # FILE SEPARATOR + 0x001d: 0x1d, # GROUP SEPARATOR + 0x001e: 0x1e, # RECORD SEPARATOR + 0x001f: 0x1f, # UNIT SEPARATOR + 0x0020: 0x20, # SPACE + 0x0021: 0x21, # EXCLAMATION MARK + 0x0022: 0x22, # QUOTATION MARK + 0x0023: 0x23, # NUMBER SIGN + 0x0024: 0x24, # DOLLAR SIGN + 0x0025: 0x25, # PERCENT SIGN + 0x0026: 0x26, # AMPERSAND + 0x0027: 0x27, # APOSTROPHE + 0x0028: 0x28, # LEFT PARENTHESIS + 0x0029: 0x29, # RIGHT PARENTHESIS + 0x002a: 0x2a, # ASTERISK + 0x002b: 0x2b, # PLUS SIGN + 0x002c: 0x2c, # COMMA + 0x002d: 0x2d, # HYPHEN-MINUS + 0x002e: 0x2e, # FULL STOP + 0x002f: 0x2f, # SOLIDUS + 0x0030: 0x30, # DIGIT ZERO + 0x0031: 0x31, # DIGIT ONE + 0x0032: 0x32, # DIGIT TWO + 0x0033: 0x33, # DIGIT THREE + 0x0034: 0x34, # DIGIT FOUR + 0x0035: 0x35, # DIGIT FIVE + 0x0036: 0x36, # DIGIT SIX + 0x0037: 0x37, # DIGIT SEVEN + 0x0038: 0x38, # DIGIT EIGHT + 0x0039: 0x39, # DIGIT NINE + 0x003a: 0x3a, # COLON + 0x003b: 0x3b, # SEMICOLON + 0x003c: 0x3c, # LESS-THAN SIGN + 0x003d: 0x3d, # EQUALS SIGN + 0x003e: 0x3e, # GREATER-THAN SIGN + 0x003f: 0x3f, # QUESTION MARK + 0x0040: 0x40, # COMMERCIAL AT + 0x0041: 0x41, # LATIN CAPITAL LETTER A + 0x0042: 0x42, # LATIN CAPITAL LETTER B + 0x0043: 0x43, # LATIN CAPITAL LETTER C + 0x0044: 0x44, # LATIN CAPITAL LETTER D + 0x0045: 0x45, # LATIN CAPITAL LETTER E + 0x0046: 0x46, # LATIN CAPITAL LETTER F + 0x0047: 0x47, # LATIN CAPITAL LETTER G + 0x0048: 0x48, # LATIN CAPITAL LETTER H + 0x0049: 0x49, # LATIN CAPITAL LETTER I + 0x004a: 0x4a, # LATIN CAPITAL LETTER J + 0x004b: 0x4b, # LATIN CAPITAL LETTER K + 0x004c: 0x4c, # LATIN CAPITAL LETTER L + 0x004d: 0x4d, # LATIN CAPITAL LETTER M + 0x004e: 0x4e, # LATIN CAPITAL LETTER N + 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x0050: 0x50, # LATIN CAPITAL LETTER P + 0x0051: 0x51, # LATIN CAPITAL LETTER Q + 0x0052: 0x52, # LATIN CAPITAL LETTER R + 0x0053: 0x53, # LATIN CAPITAL LETTER S + 0x0054: 0x54, # LATIN CAPITAL LETTER T + 0x0055: 0x55, # LATIN CAPITAL LETTER U + 0x0056: 0x56, # LATIN CAPITAL LETTER V + 0x0057: 0x57, # LATIN CAPITAL LETTER W + 0x0058: 0x58, # LATIN CAPITAL LETTER X + 0x0059: 0x59, # LATIN CAPITAL LETTER Y + 0x005a: 0x5a, # LATIN CAPITAL LETTER Z + 0x005b: 0x5b, # LEFT SQUARE BRACKET + 0x005c: 0x5c, # REVERSE SOLIDUS + 0x005d: 0x5d, # RIGHT SQUARE BRACKET + 0x005e: 0x5e, # CIRCUMFLEX ACCENT + 0x005f: 0x5f, # LOW LINE + 0x0060: 0x60, # GRAVE ACCENT + 0x0061: 0x61, # LATIN SMALL LETTER A + 0x0062: 0x62, # LATIN SMALL LETTER B + 0x0063: 0x63, # LATIN SMALL LETTER C + 0x0064: 0x64, # LATIN SMALL LETTER D + 0x0065: 0x65, # LATIN SMALL LETTER E + 0x0066: 0x66, # LATIN SMALL LETTER F + 0x0067: 0x67, # LATIN SMALL LETTER G + 0x0068: 0x68, # LATIN SMALL LETTER H + 0x0069: 0x69, # LATIN SMALL LETTER I + 0x006a: 0x6a, # LATIN SMALL LETTER J + 0x006b: 0x6b, # LATIN SMALL LETTER K + 0x006c: 0x6c, # LATIN SMALL LETTER L + 0x006d: 0x6d, # LATIN SMALL LETTER M + 0x006e: 0x6e, # LATIN SMALL LETTER N + 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x0070: 0x70, # LATIN SMALL LETTER P + 0x0071: 0x71, # LATIN SMALL LETTER Q + 0x0072: 0x72, # LATIN SMALL LETTER R + 0x0073: 0x73, # LATIN SMALL LETTER S + 0x0074: 0x74, # LATIN SMALL LETTER T + 0x0075: 0x75, # LATIN SMALL LETTER U + 0x0076: 0x76, # LATIN SMALL LETTER V + 0x0077: 0x77, # LATIN SMALL LETTER W + 0x0078: 0x78, # LATIN SMALL LETTER X + 0x0079: 0x79, # LATIN SMALL LETTER Y + 0x007a: 0x7a, # LATIN SMALL LETTER Z + 0x007b: 0x7b, # LEFT CURLY BRACKET + 0x007c: 0x7c, # VERTICAL LINE + 0x007d: 0x7d, # RIGHT CURLY BRACKET + 0x007e: 0x7e, # TILDE + 0x007f: 0x7f, # DELETE + 0x0080: 0x80, # + 0x0081: 0x81, # + 0x0082: 0x82, # + 0x0083: 0x83, # + 0x0084: 0x84, # + 0x0085: 0x85, # + 0x0086: 0x86, # + 0x0087: 0x87, # + 0x0088: 0x88, # + 0x0089: 0x89, # + 0x008a: 0x8a, # + 0x008b: 0x8b, # + 0x008c: 0x8c, # + 0x008d: 0x8d, # + 0x008e: 0x8e, # + 0x008f: 0x8f, # + 0x0090: 0x90, # + 0x0091: 0x91, # + 0x0092: 0x92, # + 0x0093: 0x93, # + 0x0094: 0x94, # + 0x0095: 0x95, # + 0x0096: 0x96, # + 0x0097: 0x97, # + 0x0098: 0x98, # + 0x0099: 0x99, # + 0x009a: 0x9a, # + 0x009b: 0x9b, # + 0x009c: 0x9c, # + 0x009d: 0x9d, # + 0x009e: 0x9e, # + 0x009f: 0x9f, # + 0x0e01: 0xa1, # THAI CHARACTER KO KAI + 0x0e02: 0xa2, # THAI CHARACTER KHO KHAI + 0x0e03: 0xa3, # THAI CHARACTER KHO KHUAT + 0x0e04: 0xa4, # THAI CHARACTER KHO KHWAI + 0x0e05: 0xa5, # THAI CHARACTER KHO KHON + 0x0e06: 0xa6, # THAI CHARACTER KHO RAKHANG + 0x0e07: 0xa7, # THAI CHARACTER NGO NGU + 0x0e08: 0xa8, # THAI CHARACTER CHO CHAN + 0x0e09: 0xa9, # THAI CHARACTER CHO CHING + 0x0e0a: 0xaa, # THAI CHARACTER CHO CHANG + 0x0e0b: 0xab, # THAI CHARACTER SO SO + 0x0e0c: 0xac, # THAI CHARACTER CHO CHOE + 0x0e0d: 0xad, # THAI CHARACTER YO YING + 0x0e0e: 0xae, # THAI CHARACTER DO CHADA + 0x0e0f: 0xaf, # THAI CHARACTER TO PATAK + 0x0e10: 0xb0, # THAI CHARACTER THO THAN + 0x0e11: 0xb1, # THAI CHARACTER THO NANGMONTHO + 0x0e12: 0xb2, # THAI CHARACTER THO PHUTHAO + 0x0e13: 0xb3, # THAI CHARACTER NO NEN + 0x0e14: 0xb4, # THAI CHARACTER DO DEK + 0x0e15: 0xb5, # THAI CHARACTER TO TAO + 0x0e16: 0xb6, # THAI CHARACTER THO THUNG + 0x0e17: 0xb7, # THAI CHARACTER THO THAHAN + 0x0e18: 0xb8, # THAI CHARACTER THO THONG + 0x0e19: 0xb9, # THAI CHARACTER NO NU + 0x0e1a: 0xba, # THAI CHARACTER BO BAIMAI + 0x0e1b: 0xbb, # THAI CHARACTER PO PLA + 0x0e1c: 0xbc, # THAI CHARACTER PHO PHUNG + 0x0e1d: 0xbd, # THAI CHARACTER FO FA + 0x0e1e: 0xbe, # THAI CHARACTER PHO PHAN + 0x0e1f: 0xbf, # THAI CHARACTER FO FAN + 0x0e20: 0xc0, # THAI CHARACTER PHO SAMPHAO + 0x0e21: 0xc1, # THAI CHARACTER MO MA + 0x0e22: 0xc2, # THAI CHARACTER YO YAK + 0x0e23: 0xc3, # THAI CHARACTER RO RUA + 0x0e24: 0xc4, # THAI CHARACTER RU + 0x0e25: 0xc5, # THAI CHARACTER LO LING + 0x0e26: 0xc6, # THAI CHARACTER LU + 0x0e27: 0xc7, # THAI CHARACTER WO WAEN + 0x0e28: 0xc8, # THAI CHARACTER SO SALA + 0x0e29: 0xc9, # THAI CHARACTER SO RUSI + 0x0e2a: 0xca, # THAI CHARACTER SO SUA + 0x0e2b: 0xcb, # THAI CHARACTER HO HIP + 0x0e2c: 0xcc, # THAI CHARACTER LO CHULA + 0x0e2d: 0xcd, # THAI CHARACTER O ANG + 0x0e2e: 0xce, # THAI CHARACTER HO NOKHUK + 0x0e2f: 0xcf, # THAI CHARACTER PAIYANNOI + 0x0e30: 0xd0, # THAI CHARACTER SARA A + 0x0e31: 0xd1, # THAI CHARACTER MAI HAN-AKAT + 0x0e32: 0xd2, # THAI CHARACTER SARA AA + 0x0e33: 0xd3, # THAI CHARACTER SARA AM + 0x0e34: 0xd4, # THAI CHARACTER SARA I + 0x0e35: 0xd5, # THAI CHARACTER SARA II + 0x0e36: 0xd6, # THAI CHARACTER SARA UE + 0x0e37: 0xd7, # THAI CHARACTER SARA UEE + 0x0e38: 0xd8, # THAI CHARACTER SARA U + 0x0e39: 0xd9, # THAI CHARACTER SARA UU + 0x0e3a: 0xda, # THAI CHARACTER PHINTHU + 0x0e3f: 0xdf, # THAI CURRENCY SYMBOL BAHT + 0x0e40: 0xe0, # THAI CHARACTER SARA E + 0x0e41: 0xe1, # THAI CHARACTER SARA AE + 0x0e42: 0xe2, # THAI CHARACTER SARA O + 0x0e43: 0xe3, # THAI CHARACTER SARA AI MAIMUAN + 0x0e44: 0xe4, # THAI CHARACTER SARA AI MAIMALAI + 0x0e45: 0xe5, # THAI CHARACTER LAKKHANGYAO + 0x0e46: 0xe6, # THAI CHARACTER MAIYAMOK + 0x0e47: 0xe7, # THAI CHARACTER MAITAIKHU + 0x0e48: 0xe8, # THAI CHARACTER MAI EK + 0x0e49: 0xe9, # THAI CHARACTER MAI THO + 0x0e4a: 0xea, # THAI CHARACTER MAI TRI + 0x0e4b: 0xeb, # THAI CHARACTER MAI CHATTAWA + 0x0e4c: 0xec, # THAI CHARACTER THANTHAKHAT + 0x0e4d: 0xed, # THAI CHARACTER NIKHAHIT + 0x0e4e: 0xee, # THAI CHARACTER YAMAKKAN + 0x0e4f: 0xef, # THAI CHARACTER FONGMAN + 0x0e50: 0xf0, # THAI DIGIT ZERO + 0x0e51: 0xf1, # THAI DIGIT ONE + 0x0e52: 0xf2, # THAI DIGIT TWO + 0x0e53: 0xf3, # THAI DIGIT THREE + 0x0e54: 0xf4, # THAI DIGIT FOUR + 0x0e55: 0xf5, # THAI DIGIT FIVE + 0x0e56: 0xf6, # THAI DIGIT SIX + 0x0e57: 0xf7, # THAI DIGIT SEVEN + 0x0e58: 0xf8, # THAI DIGIT EIGHT + 0x0e59: 0xf9, # THAI DIGIT NINE + 0x0e5a: 0xfa, # THAI CHARACTER ANGKHANKHU + 0x0e5b: 0xfb, # THAI CHARACTER KHOMUT +} \ No newline at end of file From lemburg at users.sourceforge.net Mon Oct 24 14:15:30 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Mon, 24 Oct 2005 14:15:30 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Lib/encodings cp037.py, 1.6, 1.7 cp1006.py, 1.6, 1.7 cp1026.py, 1.6, 1.7 cp1140.py, 1.3, 1.4 cp1250.py, 1.6, 1.7 cp1251.py, 1.6, 1.7 cp1252.py, 1.6, 1.7 cp1253.py, 1.6, 1.7 cp1254.py, 1.6, 1.7 cp1255.py, 1.6, 1.7 cp1256.py, 1.6, 1.7 cp1257.py, 1.6, 1.7 cp1258.py, 1.6, 1.7 cp424.py, 1.6, 1.7 cp500.py, 1.6, 1.7 cp856.py, 1.7, 1.8 cp874.py, 1.6, 1.7 cp875.py, 1.6, 1.7 iso8859_1.py, 1.6, 1.7 iso8859_10.py, 1.6, 1.7 iso8859_11.py, 1.4, 1.5 iso8859_13.py, 1.6, 1.7 iso8859_14.py, 1.6, 1.7 iso8859_15.py, 1.6, 1.7 iso8859_16.py, 1.4, 1.5 iso8859_2.py, 1.6, 1.7 iso8859_3.py, 1.6, 1.7 iso8859_4.py, 1.6, 1.7 iso8859_5.py, 1.6, 1.7 iso8859_6.py, 1.6, 1.7 iso8859_7.py, 1.6, 1.7 iso8859_8.py, 1.6, 1.7 iso8859_9.py, 1.6, 1.7 koi8_r.py, 1.6, 1.7 koi8_u.py, 1.3, 1.4 mac_centeuro.py, 1.2, 1.3 mac_croatian.py, 1.2, 1.3 mac_cyrillic.py, 1.6, 1.7 mac_farsi.py, 1.2, 1.3 mac_greek.py, 1.6, 1.7 mac_iceland.py, 1.6, 1.7 mac_roman.py, 1.6, 1.7 mac_romanian.py, 1.2, 1.3 mac_turkish.py, 1.6, 1.7 tis_620.py, 1.3, 1.4 Message-ID: <20051024121530.4FEB21E401B@bag.python.org> Update of /cvsroot/python/python/dist/src/Lib/encodings In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv1186 Modified Files: cp037.py cp1006.py cp1026.py cp1140.py cp1250.py cp1251.py cp1252.py cp1253.py cp1254.py cp1255.py cp1256.py cp1257.py cp1258.py cp424.py cp500.py cp856.py cp874.py cp875.py iso8859_1.py iso8859_10.py iso8859_11.py iso8859_13.py iso8859_14.py iso8859_15.py iso8859_16.py iso8859_2.py iso8859_3.py iso8859_4.py iso8859_5.py iso8859_6.py iso8859_7.py iso8859_8.py iso8859_9.py koi8_r.py koi8_u.py mac_centeuro.py mac_croatian.py mac_cyrillic.py mac_farsi.py mac_greek.py mac_iceland.py mac_roman.py mac_romanian.py mac_turkish.py tis_620.py Log Message: Cosmetic change: make all hex literals use upper case hex so that they look more like the Unicode Consortium files. Add ending new-line to all source files. Index: cp037.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp037.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp037.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp037.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x7f' # 0x07 -> DELETE u'\x97' # 0x08 -> CONTROL u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0a -> CONTROL - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\x8e' # 0x0A -> CONTROL + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x87' # 0x17 -> CONTROL u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1a -> CONTROL - u'\x8f' # 0x1b -> CONTROL - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x92' # 0x1A -> CONTROL + u'\x8f' # 0x1B -> CONTROL + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u'\x80' # 0x20 -> CONTROL u'\x81' # 0x21 -> CONTROL u'\x82' # 0x22 -> CONTROL @@ -74,12 +74,12 @@ u'\x1b' # 0x27 -> ESCAPE u'\x88' # 0x28 -> CONTROL u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2a -> CONTROL - u'\x8b' # 0x2b -> CONTROL - u'\x8c' # 0x2c -> CONTROL - u'\x05' # 0x2d -> ENQUIRY - u'\x06' # 0x2e -> ACKNOWLEDGE - u'\x07' # 0x2f -> BELL + u'\x8a' # 0x2A -> CONTROL + u'\x8b' # 0x2B -> CONTROL + u'\x8c' # 0x2C -> CONTROL + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL u'\x90' # 0x30 -> CONTROL u'\x91' # 0x31 -> CONTROL u'\x16' # 0x32 -> SYNCHRONOUS IDLE @@ -90,12 +90,12 @@ u'\x04' # 0x37 -> END OF TRANSMISSION u'\x98' # 0x38 -> CONTROL u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3a -> CONTROL - u'\x9b' # 0x3b -> CONTROL - u'\x14' # 0x3c -> DEVICE CONTROL FOUR - u'\x15' # 0x3d -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3e -> CONTROL - u'\x1a' # 0x3f -> SUBSTITUTE + u'\x9a' # 0x3A -> CONTROL + u'\x9b' # 0x3B -> CONTROL + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> CONTROL + u'\x1a' # 0x3F -> SUBSTITUTE u' ' # 0x40 -> SPACE u'\xa0' # 0x41 -> NO-BREAK SPACE u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX @@ -106,12 +106,12 @@ u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'\xa2' # 0x4a -> CENT SIGN - u'.' # 0x4b -> FULL STOP - u'<' # 0x4c -> LESS-THAN SIGN - u'(' # 0x4d -> LEFT PARENTHESIS - u'+' # 0x4e -> PLUS SIGN - u'|' # 0x4f -> VERTICAL LINE + u'\xa2' # 0x4A -> CENT SIGN + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'|' # 0x4F -> VERTICAL LINE u'&' # 0x50 -> AMPERSAND u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX @@ -122,12 +122,12 @@ u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'!' # 0x5a -> EXCLAMATION MARK - u'$' # 0x5b -> DOLLAR SIGN - u'*' # 0x5c -> ASTERISK - u')' # 0x5d -> RIGHT PARENTHESIS - u';' # 0x5e -> SEMICOLON - u'\xac' # 0x5f -> NOT SIGN + u'!' # 0x5A -> EXCLAMATION MARK + u'$' # 0x5B -> DOLLAR SIGN + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'\xac' # 0x5F -> NOT SIGN u'-' # 0x60 -> HYPHEN-MINUS u'/' # 0x61 -> SOLIDUS u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX @@ -138,12 +138,12 @@ u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xa6' # 0x6a -> BROKEN BAR - u',' # 0x6b -> COMMA - u'%' # 0x6c -> PERCENT SIGN - u'_' # 0x6d -> LOW LINE - u'>' # 0x6e -> GREATER-THAN SIGN - u'?' # 0x6f -> QUESTION MARK + u'\xa6' # 0x6A -> BROKEN BAR + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX @@ -154,12 +154,12 @@ u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7a -> COLON - u'#' # 0x7b -> NUMBER SIGN - u'@' # 0x7c -> COMMERCIAL AT - u"'" # 0x7d -> APOSTROPHE - u'=' # 0x7e -> EQUALS SIGN - u'"' # 0x7f -> QUOTATION MARK + u':' # 0x7A -> COLON + u'#' # 0x7B -> NUMBER SIGN + u'@' # 0x7C -> COMMERCIAL AT + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'"' # 0x7F -> QUOTATION MARK u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE u'a' # 0x81 -> LATIN SMALL LETTER A u'b' # 0x82 -> LATIN SMALL LETTER B @@ -170,12 +170,12 @@ u'g' # 0x87 -> LATIN SMALL LETTER G u'h' # 0x88 -> LATIN SMALL LETTER H u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8a -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8b -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xf0' # 0x8c -> LATIN SMALL LETTER ETH (ICELANDIC) - u'\xfd' # 0x8d -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0x8e -> LATIN SMALL LETTER THORN (ICELANDIC) - u'\xb1' # 0x8f -> PLUS-MINUS SIGN + u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) + u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) + u'\xb1' # 0x8F -> PLUS-MINUS SIGN u'\xb0' # 0x90 -> DEGREE SIGN u'j' # 0x91 -> LATIN SMALL LETTER J u'k' # 0x92 -> LATIN SMALL LETTER K @@ -186,108 +186,108 @@ u'p' # 0x97 -> LATIN SMALL LETTER P u'q' # 0x98 -> LATIN SMALL LETTER Q u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9a -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9b -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9c -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9d -> CEDILLA - u'\xc6' # 0x9e -> LATIN CAPITAL LIGATURE AE - u'\xa4' # 0x9f -> CURRENCY SIGN - u'\xb5' # 0xa0 -> MICRO SIGN - u'~' # 0xa1 -> TILDE - u's' # 0xa2 -> LATIN SMALL LETTER S - u't' # 0xa3 -> LATIN SMALL LETTER T - u'u' # 0xa4 -> LATIN SMALL LETTER U - u'v' # 0xa5 -> LATIN SMALL LETTER V - u'w' # 0xa6 -> LATIN SMALL LETTER W - u'x' # 0xa7 -> LATIN SMALL LETTER X - u'y' # 0xa8 -> LATIN SMALL LETTER Y - u'z' # 0xa9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xaa -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xab -> INVERTED QUESTION MARK - u'\xd0' # 0xac -> LATIN CAPITAL LETTER ETH (ICELANDIC) - u'\xdd' # 0xad -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xae -> LATIN CAPITAL LETTER THORN (ICELANDIC) - u'\xae' # 0xaf -> REGISTERED SIGN - u'^' # 0xb0 -> CIRCUMFLEX ACCENT - u'\xa3' # 0xb1 -> POUND SIGN - u'\xa5' # 0xb2 -> YEN SIGN - u'\xb7' # 0xb3 -> MIDDLE DOT - u'\xa9' # 0xb4 -> COPYRIGHT SIGN - u'\xa7' # 0xb5 -> SECTION SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xbc' # 0xb7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xb8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xb9 -> VULGAR FRACTION THREE QUARTERS - u'[' # 0xba -> LEFT SQUARE BRACKET - u']' # 0xbb -> RIGHT SQUARE BRACKET - u'\xaf' # 0xbc -> MACRON - u'\xa8' # 0xbd -> DIAERESIS - u'\xb4' # 0xbe -> ACUTE ACCENT - u'\xd7' # 0xbf -> MULTIPLICATION SIGN - u'{' # 0xc0 -> LEFT CURLY BRACKET - u'A' # 0xc1 -> LATIN CAPITAL LETTER A - u'B' # 0xc2 -> LATIN CAPITAL LETTER B - u'C' # 0xc3 -> LATIN CAPITAL LETTER C - u'D' # 0xc4 -> LATIN CAPITAL LETTER D - u'E' # 0xc5 -> LATIN CAPITAL LETTER E - u'F' # 0xc6 -> LATIN CAPITAL LETTER F - u'G' # 0xc7 -> LATIN CAPITAL LETTER G - u'H' # 0xc8 -> LATIN CAPITAL LETTER H - u'I' # 0xc9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xca -> SOFT HYPHEN - u'\xf4' # 0xcb -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0xcc -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0xcd -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xce -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xcf -> LATIN SMALL LETTER O WITH TILDE - u'}' # 0xd0 -> RIGHT CURLY BRACKET - u'J' # 0xd1 -> LATIN CAPITAL LETTER J - u'K' # 0xd2 -> LATIN CAPITAL LETTER K - u'L' # 0xd3 -> LATIN CAPITAL LETTER L - u'M' # 0xd4 -> LATIN CAPITAL LETTER M - u'N' # 0xd5 -> LATIN CAPITAL LETTER N - u'O' # 0xd6 -> LATIN CAPITAL LETTER O - u'P' # 0xd7 -> LATIN CAPITAL LETTER P - u'Q' # 0xd8 -> LATIN CAPITAL LETTER Q - u'R' # 0xd9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xda -> SUPERSCRIPT ONE - u'\xfb' # 0xdb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xdc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf9' # 0xdd -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xde -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xdf -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\\' # 0xe0 -> REVERSE SOLIDUS - u'\xf7' # 0xe1 -> DIVISION SIGN - u'S' # 0xe2 -> LATIN CAPITAL LETTER S - u'T' # 0xe3 -> LATIN CAPITAL LETTER T - u'U' # 0xe4 -> LATIN CAPITAL LETTER U - u'V' # 0xe5 -> LATIN CAPITAL LETTER V - u'W' # 0xe6 -> LATIN CAPITAL LETTER W - u'X' # 0xe7 -> LATIN CAPITAL LETTER X - u'Y' # 0xe8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xe9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xea -> SUPERSCRIPT TWO - u'\xd4' # 0xeb -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd6' # 0xec -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd2' # 0xed -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xee -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xef -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xf0 -> DIGIT ZERO - u'1' # 0xf1 -> DIGIT ONE - u'2' # 0xf2 -> DIGIT TWO - u'3' # 0xf3 -> DIGIT THREE - u'4' # 0xf4 -> DIGIT FOUR - u'5' # 0xf5 -> DIGIT FIVE - u'6' # 0xf6 -> DIGIT SIX - u'7' # 0xf7 -> DIGIT SEVEN - u'8' # 0xf8 -> DIGIT EIGHT - u'9' # 0xf9 -> DIGIT NINE - u'\xb3' # 0xfa -> SUPERSCRIPT THREE - u'\xdb' # 0xfb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xfc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xd9' # 0xfd -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xfe -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xff -> CONTROL + u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR + u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE + u'\xb8' # 0x9D -> CEDILLA + u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE + u'\xa4' # 0x9F -> CURRENCY SIGN + u'\xb5' # 0xA0 -> MICRO SIGN + u'~' # 0xA1 -> TILDE + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK + u'\xbf' # 0xAB -> INVERTED QUESTION MARK + u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) + u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) + u'\xae' # 0xAF -> REGISTERED SIGN + u'^' # 0xB0 -> CIRCUMFLEX ACCENT + u'\xa3' # 0xB1 -> POUND SIGN + u'\xa5' # 0xB2 -> YEN SIGN + u'\xb7' # 0xB3 -> MIDDLE DOT + u'\xa9' # 0xB4 -> COPYRIGHT SIGN + u'\xa7' # 0xB5 -> SECTION SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS + u'[' # 0xBA -> LEFT SQUARE BRACKET + u']' # 0xBB -> RIGHT SQUARE BRACKET + u'\xaf' # 0xBC -> MACRON + u'\xa8' # 0xBD -> DIAERESIS + u'\xb4' # 0xBE -> ACUTE ACCENT + u'\xd7' # 0xBF -> MULTIPLICATION SIGN + u'{' # 0xC0 -> LEFT CURLY BRACKET + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE + u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE + u'}' # 0xD0 -> RIGHT CURLY BRACKET + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xDA -> SUPERSCRIPT ONE + u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE + u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\\' # 0xE0 -> REVERSE SOLIDUS + u'\xf7' # 0xE1 -> DIVISION SIGN + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE + u'\x9f' # 0xFF -> CONTROL ) ### Encoding Map @@ -298,97 +298,97 @@ 0x0002: 0x02, # START OF TEXT 0x0003: 0x03, # END OF TEXT 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2d, # ENQUIRY - 0x0006: 0x2e, # ACKNOWLEDGE - 0x0007: 0x2f, # BELL + 0x0005: 0x2D, # ENQUIRY + 0x0006: 0x2E, # ACKNOWLEDGE + 0x0007: 0x2F, # BELL 0x0008: 0x16, # BACKSPACE 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000a: 0x25, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x25, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3c, # DEVICE CONTROL FOUR - 0x0015: 0x3d, # NEGATIVE ACKNOWLEDGE + 0x0014: 0x3C, # DEVICE CONTROL FOUR + 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE 0x0016: 0x32, # SYNCHRONOUS IDLE 0x0017: 0x26, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x3f, # SUBSTITUTE - 0x001b: 0x27, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x3F, # SUBSTITUTE + 0x001B: 0x27, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x40, # SPACE - 0x0021: 0x5a, # EXCLAMATION MARK - 0x0022: 0x7f, # QUOTATION MARK - 0x0023: 0x7b, # NUMBER SIGN - 0x0024: 0x5b, # DOLLAR SIGN - 0x0025: 0x6c, # PERCENT SIGN + 0x0021: 0x5A, # EXCLAMATION MARK + 0x0022: 0x7F, # QUOTATION MARK + 0x0023: 0x7B, # NUMBER SIGN + 0x0024: 0x5B, # DOLLAR SIGN + 0x0025: 0x6C, # PERCENT SIGN 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7d, # APOSTROPHE - 0x0028: 0x4d, # LEFT PARENTHESIS - 0x0029: 0x5d, # RIGHT PARENTHESIS - 0x002a: 0x5c, # ASTERISK - 0x002b: 0x4e, # PLUS SIGN - 0x002c: 0x6b, # COMMA - 0x002d: 0x60, # HYPHEN-MINUS - 0x002e: 0x4b, # FULL STOP - 0x002f: 0x61, # SOLIDUS - 0x0030: 0xf0, # DIGIT ZERO - 0x0031: 0xf1, # DIGIT ONE - 0x0032: 0xf2, # DIGIT TWO - 0x0033: 0xf3, # DIGIT THREE - 0x0034: 0xf4, # DIGIT FOUR - 0x0035: 0xf5, # DIGIT FIVE - 0x0036: 0xf6, # DIGIT SIX - 0x0037: 0xf7, # DIGIT SEVEN - 0x0038: 0xf8, # DIGIT EIGHT - 0x0039: 0xf9, # DIGIT NINE - 0x003a: 0x7a, # COLON - 0x003b: 0x5e, # SEMICOLON - 0x003c: 0x4c, # LESS-THAN SIGN - 0x003d: 0x7e, # EQUALS SIGN - 0x003e: 0x6e, # GREATER-THAN SIGN - 0x003f: 0x6f, # QUESTION MARK - 0x0040: 0x7c, # COMMERCIAL AT - 0x0041: 0xc1, # LATIN CAPITAL LETTER A - 0x0042: 0xc2, # LATIN CAPITAL LETTER B - 0x0043: 0xc3, # LATIN CAPITAL LETTER C - 0x0044: 0xc4, # LATIN CAPITAL LETTER D - 0x0045: 0xc5, # LATIN CAPITAL LETTER E - 0x0046: 0xc6, # LATIN CAPITAL LETTER F - 0x0047: 0xc7, # LATIN CAPITAL LETTER G - 0x0048: 0xc8, # LATIN CAPITAL LETTER H - 0x0049: 0xc9, # LATIN CAPITAL LETTER I - 0x004a: 0xd1, # LATIN CAPITAL LETTER J - 0x004b: 0xd2, # LATIN CAPITAL LETTER K - 0x004c: 0xd3, # LATIN CAPITAL LETTER L - 0x004d: 0xd4, # LATIN CAPITAL LETTER M - 0x004e: 0xd5, # LATIN CAPITAL LETTER N - 0x004f: 0xd6, # LATIN CAPITAL LETTER O - 0x0050: 0xd7, # LATIN CAPITAL LETTER P - 0x0051: 0xd8, # LATIN CAPITAL LETTER Q - 0x0052: 0xd9, # LATIN CAPITAL LETTER R - 0x0053: 0xe2, # LATIN CAPITAL LETTER S - 0x0054: 0xe3, # LATIN CAPITAL LETTER T - 0x0055: 0xe4, # LATIN CAPITAL LETTER U - 0x0056: 0xe5, # LATIN CAPITAL LETTER V - 0x0057: 0xe6, # LATIN CAPITAL LETTER W - 0x0058: 0xe7, # LATIN CAPITAL LETTER X - 0x0059: 0xe8, # LATIN CAPITAL LETTER Y - 0x005a: 0xe9, # LATIN CAPITAL LETTER Z - 0x005b: 0xba, # LEFT SQUARE BRACKET - 0x005c: 0xe0, # REVERSE SOLIDUS - 0x005d: 0xbb, # RIGHT SQUARE BRACKET - 0x005e: 0xb0, # CIRCUMFLEX ACCENT - 0x005f: 0x6d, # LOW LINE + 0x0027: 0x7D, # APOSTROPHE + 0x0028: 0x4D, # LEFT PARENTHESIS + 0x0029: 0x5D, # RIGHT PARENTHESIS + 0x002A: 0x5C, # ASTERISK + 0x002B: 0x4E, # PLUS SIGN + 0x002C: 0x6B, # COMMA + 0x002D: 0x60, # HYPHEN-MINUS + 0x002E: 0x4B, # FULL STOP + 0x002F: 0x61, # SOLIDUS + 0x0030: 0xF0, # DIGIT ZERO + 0x0031: 0xF1, # DIGIT ONE + 0x0032: 0xF2, # DIGIT TWO + 0x0033: 0xF3, # DIGIT THREE + 0x0034: 0xF4, # DIGIT FOUR + 0x0035: 0xF5, # DIGIT FIVE + 0x0036: 0xF6, # DIGIT SIX + 0x0037: 0xF7, # DIGIT SEVEN + 0x0038: 0xF8, # DIGIT EIGHT + 0x0039: 0xF9, # DIGIT NINE + 0x003A: 0x7A, # COLON + 0x003B: 0x5E, # SEMICOLON + 0x003C: 0x4C, # LESS-THAN SIGN + 0x003D: 0x7E, # EQUALS SIGN + 0x003E: 0x6E, # GREATER-THAN SIGN + 0x003F: 0x6F, # QUESTION MARK + 0x0040: 0x7C, # COMMERCIAL AT + 0x0041: 0xC1, # LATIN CAPITAL LETTER A + 0x0042: 0xC2, # LATIN CAPITAL LETTER B + 0x0043: 0xC3, # LATIN CAPITAL LETTER C + 0x0044: 0xC4, # LATIN CAPITAL LETTER D + 0x0045: 0xC5, # LATIN CAPITAL LETTER E + 0x0046: 0xC6, # LATIN CAPITAL LETTER F + 0x0047: 0xC7, # LATIN CAPITAL LETTER G + 0x0048: 0xC8, # LATIN CAPITAL LETTER H + 0x0049: 0xC9, # LATIN CAPITAL LETTER I + 0x004A: 0xD1, # LATIN CAPITAL LETTER J + 0x004B: 0xD2, # LATIN CAPITAL LETTER K + 0x004C: 0xD3, # LATIN CAPITAL LETTER L + 0x004D: 0xD4, # LATIN CAPITAL LETTER M + 0x004E: 0xD5, # LATIN CAPITAL LETTER N + 0x004F: 0xD6, # LATIN CAPITAL LETTER O + 0x0050: 0xD7, # LATIN CAPITAL LETTER P + 0x0051: 0xD8, # LATIN CAPITAL LETTER Q + 0x0052: 0xD9, # LATIN CAPITAL LETTER R + 0x0053: 0xE2, # LATIN CAPITAL LETTER S + 0x0054: 0xE3, # LATIN CAPITAL LETTER T + 0x0055: 0xE4, # LATIN CAPITAL LETTER U + 0x0056: 0xE5, # LATIN CAPITAL LETTER V + 0x0057: 0xE6, # LATIN CAPITAL LETTER W + 0x0058: 0xE7, # LATIN CAPITAL LETTER X + 0x0059: 0xE8, # LATIN CAPITAL LETTER Y + 0x005A: 0xE9, # LATIN CAPITAL LETTER Z + 0x005B: 0xBA, # LEFT SQUARE BRACKET + 0x005C: 0xE0, # REVERSE SOLIDUS + 0x005D: 0xBB, # RIGHT SQUARE BRACKET + 0x005E: 0xB0, # CIRCUMFLEX ACCENT + 0x005F: 0x6D, # LOW LINE 0x0060: 0x79, # GRAVE ACCENT 0x0061: 0x81, # LATIN SMALL LETTER A 0x0062: 0x82, # LATIN SMALL LETTER B @@ -399,28 +399,28 @@ 0x0067: 0x87, # LATIN SMALL LETTER G 0x0068: 0x88, # LATIN SMALL LETTER H 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006a: 0x91, # LATIN SMALL LETTER J - 0x006b: 0x92, # LATIN SMALL LETTER K - 0x006c: 0x93, # LATIN SMALL LETTER L - 0x006d: 0x94, # LATIN SMALL LETTER M - 0x006e: 0x95, # LATIN SMALL LETTER N - 0x006f: 0x96, # LATIN SMALL LETTER O + 0x006A: 0x91, # LATIN SMALL LETTER J + 0x006B: 0x92, # LATIN SMALL LETTER K + 0x006C: 0x93, # LATIN SMALL LETTER L + 0x006D: 0x94, # LATIN SMALL LETTER M + 0x006E: 0x95, # LATIN SMALL LETTER N + 0x006F: 0x96, # LATIN SMALL LETTER O 0x0070: 0x97, # LATIN SMALL LETTER P 0x0071: 0x98, # LATIN SMALL LETTER Q 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xa2, # LATIN SMALL LETTER S - 0x0074: 0xa3, # LATIN SMALL LETTER T - 0x0075: 0xa4, # LATIN SMALL LETTER U - 0x0076: 0xa5, # LATIN SMALL LETTER V - 0x0077: 0xa6, # LATIN SMALL LETTER W - 0x0078: 0xa7, # LATIN SMALL LETTER X - 0x0079: 0xa8, # LATIN SMALL LETTER Y - 0x007a: 0xa9, # LATIN SMALL LETTER Z - 0x007b: 0xc0, # LEFT CURLY BRACKET - 0x007c: 0x4f, # VERTICAL LINE - 0x007d: 0xd0, # RIGHT CURLY BRACKET - 0x007e: 0xa1, # TILDE - 0x007f: 0x07, # DELETE + 0x0073: 0xA2, # LATIN SMALL LETTER S + 0x0074: 0xA3, # LATIN SMALL LETTER T + 0x0075: 0xA4, # LATIN SMALL LETTER U + 0x0076: 0xA5, # LATIN SMALL LETTER V + 0x0077: 0xA6, # LATIN SMALL LETTER W + 0x0078: 0xA7, # LATIN SMALL LETTER X + 0x0079: 0xA8, # LATIN SMALL LETTER Y + 0x007A: 0xA9, # LATIN SMALL LETTER Z + 0x007B: 0xC0, # LEFT CURLY BRACKET + 0x007C: 0x4F, # VERTICAL LINE + 0x007D: 0xD0, # RIGHT CURLY BRACKET + 0x007E: 0xA1, # TILDE + 0x007F: 0x07, # DELETE 0x0080: 0x20, # CONTROL 0x0081: 0x21, # CONTROL 0x0082: 0x22, # CONTROL @@ -431,15 +431,15 @@ 0x0087: 0x17, # CONTROL 0x0088: 0x28, # CONTROL 0x0089: 0x29, # CONTROL - 0x008a: 0x2a, # CONTROL - 0x008b: 0x2b, # CONTROL - 0x008c: 0x2c, # CONTROL - 0x008d: 0x09, # CONTROL - 0x008e: 0x0a, # CONTROL - 0x008f: 0x1b, # CONTROL + 0x008A: 0x2A, # CONTROL + 0x008B: 0x2B, # CONTROL + 0x008C: 0x2C, # CONTROL + 0x008D: 0x09, # CONTROL + 0x008E: 0x0A, # CONTROL + 0x008F: 0x1B, # CONTROL 0x0090: 0x30, # CONTROL 0x0091: 0x31, # CONTROL - 0x0092: 0x1a, # CONTROL + 0x0092: 0x1A, # CONTROL 0x0093: 0x33, # CONTROL 0x0094: 0x34, # CONTROL 0x0095: 0x35, # CONTROL @@ -447,106 +447,107 @@ 0x0097: 0x08, # CONTROL 0x0098: 0x38, # CONTROL 0x0099: 0x39, # CONTROL - 0x009a: 0x3a, # CONTROL - 0x009b: 0x3b, # CONTROL - 0x009c: 0x04, # CONTROL - 0x009d: 0x14, # CONTROL - 0x009e: 0x3e, # CONTROL - 0x009f: 0xff, # CONTROL - 0x00a0: 0x41, # NO-BREAK SPACE - 0x00a1: 0xaa, # INVERTED EXCLAMATION MARK - 0x00a2: 0x4a, # CENT SIGN - 0x00a3: 0xb1, # POUND SIGN - 0x00a4: 0x9f, # CURRENCY SIGN - 0x00a5: 0xb2, # YEN SIGN - 0x00a6: 0x6a, # BROKEN BAR - 0x00a7: 0xb5, # SECTION SIGN - 0x00a8: 0xbd, # DIAERESIS - 0x00a9: 0xb4, # COPYRIGHT SIGN - 0x00aa: 0x9a, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x8a, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x5f, # NOT SIGN - 0x00ad: 0xca, # SOFT HYPHEN - 0x00ae: 0xaf, # REGISTERED SIGN - 0x00af: 0xbc, # MACRON - 0x00b0: 0x90, # DEGREE SIGN - 0x00b1: 0x8f, # PLUS-MINUS SIGN - 0x00b2: 0xea, # SUPERSCRIPT TWO - 0x00b3: 0xfa, # SUPERSCRIPT THREE - 0x00b4: 0xbe, # ACUTE ACCENT - 0x00b5: 0xa0, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb3, # MIDDLE DOT - 0x00b8: 0x9d, # CEDILLA - 0x00b9: 0xda, # SUPERSCRIPT ONE - 0x00ba: 0x9b, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x8b, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xb7, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xb8, # VULGAR FRACTION ONE HALF - 0x00be: 0xb9, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0xab, # INVERTED QUESTION MARK - 0x00c0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x9e, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d0: 0xac, # LATIN CAPITAL LETTER ETH (ICELANDIC) - 0x00d1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xed, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xee, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xeb, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xef, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0xec, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xbf, # MULTIPLICATION SIGN - 0x00d8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0xfd, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xfe, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xfb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0xfc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0xad, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0xae, # LATIN CAPITAL LETTER THORN (ICELANDIC) - 0x00df: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e0: 0x44, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x45, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x46, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x9c, # LATIN SMALL LIGATURE AE - 0x00e7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x54, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x51, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x58, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x55, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f0: 0x8c, # LATIN SMALL LETTER ETH (ICELANDIC) - 0x00f1: 0x49, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0xcd, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0xce, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xcb, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0xcf, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0xcc, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xe1, # DIVISION SIGN - 0x00f8: 0x70, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0xdd, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0xde, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0xdb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xdc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0x8d, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0x8e, # LATIN SMALL LETTER THORN (ICELANDIC) - 0x00ff: 0xdf, # LATIN SMALL LETTER Y WITH DIAERESIS -} \ No newline at end of file + 0x009A: 0x3A, # CONTROL + 0x009B: 0x3B, # CONTROL + 0x009C: 0x04, # CONTROL + 0x009D: 0x14, # CONTROL + 0x009E: 0x3E, # CONTROL + 0x009F: 0xFF, # CONTROL + 0x00A0: 0x41, # NO-BREAK SPACE + 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK + 0x00A2: 0x4A, # CENT SIGN + 0x00A3: 0xB1, # POUND SIGN + 0x00A4: 0x9F, # CURRENCY SIGN + 0x00A5: 0xB2, # YEN SIGN + 0x00A6: 0x6A, # BROKEN BAR + 0x00A7: 0xB5, # SECTION SIGN + 0x00A8: 0xBD, # DIAERESIS + 0x00A9: 0xB4, # COPYRIGHT SIGN + 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR + 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0x5F, # NOT SIGN + 0x00AD: 0xCA, # SOFT HYPHEN + 0x00AE: 0xAF, # REGISTERED SIGN + 0x00AF: 0xBC, # MACRON + 0x00B0: 0x90, # DEGREE SIGN + 0x00B1: 0x8F, # PLUS-MINUS SIGN + 0x00B2: 0xEA, # SUPERSCRIPT TWO + 0x00B3: 0xFA, # SUPERSCRIPT THREE + 0x00B4: 0xBE, # ACUTE ACCENT + 0x00B5: 0xA0, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB3, # MIDDLE DOT + 0x00B8: 0x9D, # CEDILLA + 0x00B9: 0xDA, # SUPERSCRIPT ONE + 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF + 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xAB, # INVERTED QUESTION MARK + 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE + 0x00C7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xAC, # LATIN CAPITAL LETTER ETH (ICELANDIC) + 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xEC, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xBF, # MULTIPLICATION SIGN + 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xFC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xAD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xAE, # LATIN CAPITAL LETTER THORN (ICELANDIC) + 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE + 0x00E7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0x8C, # LATIN SMALL LETTER ETH (ICELANDIC) + 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xCC, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xE1, # DIVISION SIGN + 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xDC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0x8D, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0x8E, # LATIN SMALL LETTER THORN (ICELANDIC) + 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS +} + Index: cp1006.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1006.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp1006.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp1006.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,108 +186,108 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u06f0' # 0xa1 -> EXTENDED ARABIC-INDIC DIGIT ZERO - u'\u06f1' # 0xa2 -> EXTENDED ARABIC-INDIC DIGIT ONE - u'\u06f2' # 0xa3 -> EXTENDED ARABIC-INDIC DIGIT TWO - u'\u06f3' # 0xa4 -> EXTENDED ARABIC-INDIC DIGIT THREE - u'\u06f4' # 0xa5 -> EXTENDED ARABIC-INDIC DIGIT FOUR - u'\u06f5' # 0xa6 -> EXTENDED ARABIC-INDIC DIGIT FIVE - u'\u06f6' # 0xa7 -> EXTENDED ARABIC-INDIC DIGIT SIX - u'\u06f7' # 0xa8 -> EXTENDED ARABIC-INDIC DIGIT SEVEN - u'\u06f8' # 0xa9 -> EXTENDED ARABIC-INDIC DIGIT EIGHT - u'\u06f9' # 0xaa -> EXTENDED ARABIC-INDIC DIGIT NINE - u'\u060c' # 0xab -> ARABIC COMMA - u'\u061b' # 0xac -> ARABIC SEMICOLON - u'\xad' # 0xad -> SOFT HYPHEN - u'\u061f' # 0xae -> ARABIC QUESTION MARK - u'\ufe81' # 0xaf -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - u'\ufe8d' # 0xb0 -> ARABIC LETTER ALEF ISOLATED FORM - u'\ufe8e' # 0xb1 -> ARABIC LETTER ALEF FINAL FORM - u'\ufe8e' # 0xb2 -> ARABIC LETTER ALEF FINAL FORM - u'\ufe8f' # 0xb3 -> ARABIC LETTER BEH ISOLATED FORM - u'\ufe91' # 0xb4 -> ARABIC LETTER BEH INITIAL FORM - u'\ufb56' # 0xb5 -> ARABIC LETTER PEH ISOLATED FORM - u'\ufb58' # 0xb6 -> ARABIC LETTER PEH INITIAL FORM - u'\ufe93' # 0xb7 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM - u'\ufe95' # 0xb8 -> ARABIC LETTER TEH ISOLATED FORM - u'\ufe97' # 0xb9 -> ARABIC LETTER TEH INITIAL FORM - u'\ufb66' # 0xba -> ARABIC LETTER TTEH ISOLATED FORM - u'\ufb68' # 0xbb -> ARABIC LETTER TTEH INITIAL FORM - u'\ufe99' # 0xbc -> ARABIC LETTER THEH ISOLATED FORM - u'\ufe9b' # 0xbd -> ARABIC LETTER THEH INITIAL FORM - u'\ufe9d' # 0xbe -> ARABIC LETTER JEEM ISOLATED FORM - u'\ufe9f' # 0xbf -> ARABIC LETTER JEEM INITIAL FORM - u'\ufb7a' # 0xc0 -> ARABIC LETTER TCHEH ISOLATED FORM - u'\ufb7c' # 0xc1 -> ARABIC LETTER TCHEH INITIAL FORM - u'\ufea1' # 0xc2 -> ARABIC LETTER HAH ISOLATED FORM - u'\ufea3' # 0xc3 -> ARABIC LETTER HAH INITIAL FORM - u'\ufea5' # 0xc4 -> ARABIC LETTER KHAH ISOLATED FORM - u'\ufea7' # 0xc5 -> ARABIC LETTER KHAH INITIAL FORM - u'\ufea9' # 0xc6 -> ARABIC LETTER DAL ISOLATED FORM - u'\ufb84' # 0xc7 -> ARABIC LETTER DAHAL ISOLATED FORMN - u'\ufeab' # 0xc8 -> ARABIC LETTER THAL ISOLATED FORM - u'\ufead' # 0xc9 -> ARABIC LETTER REH ISOLATED FORM - u'\ufb8c' # 0xca -> ARABIC LETTER RREH ISOLATED FORM - u'\ufeaf' # 0xcb -> ARABIC LETTER ZAIN ISOLATED FORM - u'\ufb8a' # 0xcc -> ARABIC LETTER JEH ISOLATED FORM - u'\ufeb1' # 0xcd -> ARABIC LETTER SEEN ISOLATED FORM - u'\ufeb3' # 0xce -> ARABIC LETTER SEEN INITIAL FORM - u'\ufeb5' # 0xcf -> ARABIC LETTER SHEEN ISOLATED FORM - u'\ufeb7' # 0xd0 -> ARABIC LETTER SHEEN INITIAL FORM - u'\ufeb9' # 0xd1 -> ARABIC LETTER SAD ISOLATED FORM - u'\ufebb' # 0xd2 -> ARABIC LETTER SAD INITIAL FORM - u'\ufebd' # 0xd3 -> ARABIC LETTER DAD ISOLATED FORM - u'\ufebf' # 0xd4 -> ARABIC LETTER DAD INITIAL FORM - u'\ufec1' # 0xd5 -> ARABIC LETTER TAH ISOLATED FORM - u'\ufec5' # 0xd6 -> ARABIC LETTER ZAH ISOLATED FORM - u'\ufec9' # 0xd7 -> ARABIC LETTER AIN ISOLATED FORM - u'\ufeca' # 0xd8 -> ARABIC LETTER AIN FINAL FORM - u'\ufecb' # 0xd9 -> ARABIC LETTER AIN INITIAL FORM - u'\ufecc' # 0xda -> ARABIC LETTER AIN MEDIAL FORM - u'\ufecd' # 0xdb -> ARABIC LETTER GHAIN ISOLATED FORM - u'\ufece' # 0xdc -> ARABIC LETTER GHAIN FINAL FORM - u'\ufecf' # 0xdd -> ARABIC LETTER GHAIN INITIAL FORM - u'\ufed0' # 0xde -> ARABIC LETTER GHAIN MEDIAL FORM - u'\ufed1' # 0xdf -> ARABIC LETTER FEH ISOLATED FORM - u'\ufed3' # 0xe0 -> ARABIC LETTER FEH INITIAL FORM - u'\ufed5' # 0xe1 -> ARABIC LETTER QAF ISOLATED FORM - u'\ufed7' # 0xe2 -> ARABIC LETTER QAF INITIAL FORM - u'\ufed9' # 0xe3 -> ARABIC LETTER KAF ISOLATED FORM - u'\ufedb' # 0xe4 -> ARABIC LETTER KAF INITIAL FORM - u'\ufb92' # 0xe5 -> ARABIC LETTER GAF ISOLATED FORM - u'\ufb94' # 0xe6 -> ARABIC LETTER GAF INITIAL FORM - u'\ufedd' # 0xe7 -> ARABIC LETTER LAM ISOLATED FORM - u'\ufedf' # 0xe8 -> ARABIC LETTER LAM INITIAL FORM - u'\ufee0' # 0xe9 -> ARABIC LETTER LAM MEDIAL FORM - u'\ufee1' # 0xea -> ARABIC LETTER MEEM ISOLATED FORM - u'\ufee3' # 0xeb -> ARABIC LETTER MEEM INITIAL FORM - u'\ufb9e' # 0xec -> ARABIC LETTER NOON GHUNNA ISOLATED FORM - u'\ufee5' # 0xed -> ARABIC LETTER NOON ISOLATED FORM - u'\ufee7' # 0xee -> ARABIC LETTER NOON INITIAL FORM - u'\ufe85' # 0xef -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - u'\ufeed' # 0xf0 -> ARABIC LETTER WAW ISOLATED FORM - u'\ufba6' # 0xf1 -> ARABIC LETTER HEH GOAL ISOLATED FORM - u'\ufba8' # 0xf2 -> ARABIC LETTER HEH GOAL INITIAL FORM - u'\ufba9' # 0xf3 -> ARABIC LETTER HEH GOAL MEDIAL FORM - u'\ufbaa' # 0xf4 -> ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM - u'\ufe80' # 0xf5 -> ARABIC LETTER HAMZA ISOLATED FORM - u'\ufe89' # 0xf6 -> ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM - u'\ufe8a' # 0xf7 -> ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM - u'\ufe8b' # 0xf8 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - u'\ufef1' # 0xf9 -> ARABIC LETTER YEH ISOLATED FORM - u'\ufef2' # 0xfa -> ARABIC LETTER YEH FINAL FORM - u'\ufef3' # 0xfb -> ARABIC LETTER YEH INITIAL FORM - u'\ufbb0' # 0xfc -> ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM - u'\ufbae' # 0xfd -> ARABIC LETTER YEH BARREE ISOLATED FORM - u'\ufe7c' # 0xfe -> ARABIC SHADDA ISOLATED FORM - u'\ufe7d' # 0xff -> ARABIC SHADDA MEDIAL FORM + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u06f0' # 0xA1 -> EXTENDED ARABIC-INDIC DIGIT ZERO + u'\u06f1' # 0xA2 -> EXTENDED ARABIC-INDIC DIGIT ONE + u'\u06f2' # 0xA3 -> EXTENDED ARABIC-INDIC DIGIT TWO + u'\u06f3' # 0xA4 -> EXTENDED ARABIC-INDIC DIGIT THREE + u'\u06f4' # 0xA5 -> EXTENDED ARABIC-INDIC DIGIT FOUR + u'\u06f5' # 0xA6 -> EXTENDED ARABIC-INDIC DIGIT FIVE + u'\u06f6' # 0xA7 -> EXTENDED ARABIC-INDIC DIGIT SIX + u'\u06f7' # 0xA8 -> EXTENDED ARABIC-INDIC DIGIT SEVEN + u'\u06f8' # 0xA9 -> EXTENDED ARABIC-INDIC DIGIT EIGHT + u'\u06f9' # 0xAA -> EXTENDED ARABIC-INDIC DIGIT NINE + u'\u060c' # 0xAB -> ARABIC COMMA + u'\u061b' # 0xAC -> ARABIC SEMICOLON + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u061f' # 0xAE -> ARABIC QUESTION MARK + u'\ufe81' # 0xAF -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + u'\ufe8d' # 0xB0 -> ARABIC LETTER ALEF ISOLATED FORM + u'\ufe8e' # 0xB1 -> ARABIC LETTER ALEF FINAL FORM + u'\ufe8e' # 0xB2 -> ARABIC LETTER ALEF FINAL FORM + u'\ufe8f' # 0xB3 -> ARABIC LETTER BEH ISOLATED FORM + u'\ufe91' # 0xB4 -> ARABIC LETTER BEH INITIAL FORM + u'\ufb56' # 0xB5 -> ARABIC LETTER PEH ISOLATED FORM + u'\ufb58' # 0xB6 -> ARABIC LETTER PEH INITIAL FORM + u'\ufe93' # 0xB7 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM + u'\ufe95' # 0xB8 -> ARABIC LETTER TEH ISOLATED FORM + u'\ufe97' # 0xB9 -> ARABIC LETTER TEH INITIAL FORM + u'\ufb66' # 0xBA -> ARABIC LETTER TTEH ISOLATED FORM + u'\ufb68' # 0xBB -> ARABIC LETTER TTEH INITIAL FORM + u'\ufe99' # 0xBC -> ARABIC LETTER THEH ISOLATED FORM + u'\ufe9b' # 0xBD -> ARABIC LETTER THEH INITIAL FORM + u'\ufe9d' # 0xBE -> ARABIC LETTER JEEM ISOLATED FORM + u'\ufe9f' # 0xBF -> ARABIC LETTER JEEM INITIAL FORM + u'\ufb7a' # 0xC0 -> ARABIC LETTER TCHEH ISOLATED FORM + u'\ufb7c' # 0xC1 -> ARABIC LETTER TCHEH INITIAL FORM + u'\ufea1' # 0xC2 -> ARABIC LETTER HAH ISOLATED FORM + u'\ufea3' # 0xC3 -> ARABIC LETTER HAH INITIAL FORM + u'\ufea5' # 0xC4 -> ARABIC LETTER KHAH ISOLATED FORM + u'\ufea7' # 0xC5 -> ARABIC LETTER KHAH INITIAL FORM + u'\ufea9' # 0xC6 -> ARABIC LETTER DAL ISOLATED FORM + u'\ufb84' # 0xC7 -> ARABIC LETTER DAHAL ISOLATED FORMN + u'\ufeab' # 0xC8 -> ARABIC LETTER THAL ISOLATED FORM + u'\ufead' # 0xC9 -> ARABIC LETTER REH ISOLATED FORM + u'\ufb8c' # 0xCA -> ARABIC LETTER RREH ISOLATED FORM + u'\ufeaf' # 0xCB -> ARABIC LETTER ZAIN ISOLATED FORM + u'\ufb8a' # 0xCC -> ARABIC LETTER JEH ISOLATED FORM + u'\ufeb1' # 0xCD -> ARABIC LETTER SEEN ISOLATED FORM + u'\ufeb3' # 0xCE -> ARABIC LETTER SEEN INITIAL FORM + u'\ufeb5' # 0xCF -> ARABIC LETTER SHEEN ISOLATED FORM + u'\ufeb7' # 0xD0 -> ARABIC LETTER SHEEN INITIAL FORM + u'\ufeb9' # 0xD1 -> ARABIC LETTER SAD ISOLATED FORM + u'\ufebb' # 0xD2 -> ARABIC LETTER SAD INITIAL FORM + u'\ufebd' # 0xD3 -> ARABIC LETTER DAD ISOLATED FORM + u'\ufebf' # 0xD4 -> ARABIC LETTER DAD INITIAL FORM + u'\ufec1' # 0xD5 -> ARABIC LETTER TAH ISOLATED FORM + u'\ufec5' # 0xD6 -> ARABIC LETTER ZAH ISOLATED FORM + u'\ufec9' # 0xD7 -> ARABIC LETTER AIN ISOLATED FORM + u'\ufeca' # 0xD8 -> ARABIC LETTER AIN FINAL FORM + u'\ufecb' # 0xD9 -> ARABIC LETTER AIN INITIAL FORM + u'\ufecc' # 0xDA -> ARABIC LETTER AIN MEDIAL FORM + u'\ufecd' # 0xDB -> ARABIC LETTER GHAIN ISOLATED FORM + u'\ufece' # 0xDC -> ARABIC LETTER GHAIN FINAL FORM + u'\ufecf' # 0xDD -> ARABIC LETTER GHAIN INITIAL FORM + u'\ufed0' # 0xDE -> ARABIC LETTER GHAIN MEDIAL FORM + u'\ufed1' # 0xDF -> ARABIC LETTER FEH ISOLATED FORM + u'\ufed3' # 0xE0 -> ARABIC LETTER FEH INITIAL FORM + u'\ufed5' # 0xE1 -> ARABIC LETTER QAF ISOLATED FORM + u'\ufed7' # 0xE2 -> ARABIC LETTER QAF INITIAL FORM + u'\ufed9' # 0xE3 -> ARABIC LETTER KAF ISOLATED FORM + u'\ufedb' # 0xE4 -> ARABIC LETTER KAF INITIAL FORM + u'\ufb92' # 0xE5 -> ARABIC LETTER GAF ISOLATED FORM + u'\ufb94' # 0xE6 -> ARABIC LETTER GAF INITIAL FORM + u'\ufedd' # 0xE7 -> ARABIC LETTER LAM ISOLATED FORM + u'\ufedf' # 0xE8 -> ARABIC LETTER LAM INITIAL FORM + u'\ufee0' # 0xE9 -> ARABIC LETTER LAM MEDIAL FORM + u'\ufee1' # 0xEA -> ARABIC LETTER MEEM ISOLATED FORM + u'\ufee3' # 0xEB -> ARABIC LETTER MEEM INITIAL FORM + u'\ufb9e' # 0xEC -> ARABIC LETTER NOON GHUNNA ISOLATED FORM + u'\ufee5' # 0xED -> ARABIC LETTER NOON ISOLATED FORM + u'\ufee7' # 0xEE -> ARABIC LETTER NOON INITIAL FORM + u'\ufe85' # 0xEF -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + u'\ufeed' # 0xF0 -> ARABIC LETTER WAW ISOLATED FORM + u'\ufba6' # 0xF1 -> ARABIC LETTER HEH GOAL ISOLATED FORM + u'\ufba8' # 0xF2 -> ARABIC LETTER HEH GOAL INITIAL FORM + u'\ufba9' # 0xF3 -> ARABIC LETTER HEH GOAL MEDIAL FORM + u'\ufbaa' # 0xF4 -> ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM + u'\ufe80' # 0xF5 -> ARABIC LETTER HAMZA ISOLATED FORM + u'\ufe89' # 0xF6 -> ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM + u'\ufe8a' # 0xF7 -> ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM + u'\ufe8b' # 0xF8 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + u'\ufef1' # 0xF9 -> ARABIC LETTER YEH ISOLATED FORM + u'\ufef2' # 0xFA -> ARABIC LETTER YEH FINAL FORM + u'\ufef3' # 0xFB -> ARABIC LETTER YEH INITIAL FORM + u'\ufbb0' # 0xFC -> ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM + u'\ufbae' # 0xFD -> ARABIC LETTER YEH BARREE ISOLATED FORM + u'\ufe7c' # 0xFE -> ARABIC SHADDA ISOLATED FORM + u'\ufe7d' # 0xFF -> ARABIC SHADDA MEDIAL FORM ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,105 +447,106 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00ad: 0xad, # SOFT HYPHEN - 0x060c: 0xab, # ARABIC COMMA - 0x061b: 0xac, # ARABIC SEMICOLON - 0x061f: 0xae, # ARABIC QUESTION MARK - 0x06f0: 0xa1, # EXTENDED ARABIC-INDIC DIGIT ZERO - 0x06f1: 0xa2, # EXTENDED ARABIC-INDIC DIGIT ONE - 0x06f2: 0xa3, # EXTENDED ARABIC-INDIC DIGIT TWO - 0x06f3: 0xa4, # EXTENDED ARABIC-INDIC DIGIT THREE - 0x06f4: 0xa5, # EXTENDED ARABIC-INDIC DIGIT FOUR - 0x06f5: 0xa6, # EXTENDED ARABIC-INDIC DIGIT FIVE - 0x06f6: 0xa7, # EXTENDED ARABIC-INDIC DIGIT SIX - 0x06f7: 0xa8, # EXTENDED ARABIC-INDIC DIGIT SEVEN - 0x06f8: 0xa9, # EXTENDED ARABIC-INDIC DIGIT EIGHT - 0x06f9: 0xaa, # EXTENDED ARABIC-INDIC DIGIT NINE - 0xfb56: 0xb5, # ARABIC LETTER PEH ISOLATED FORM - 0xfb58: 0xb6, # ARABIC LETTER PEH INITIAL FORM - 0xfb66: 0xba, # ARABIC LETTER TTEH ISOLATED FORM - 0xfb68: 0xbb, # ARABIC LETTER TTEH INITIAL FORM - 0xfb7a: 0xc0, # ARABIC LETTER TCHEH ISOLATED FORM - 0xfb7c: 0xc1, # ARABIC LETTER TCHEH INITIAL FORM - 0xfb84: 0xc7, # ARABIC LETTER DAHAL ISOLATED FORMN - 0xfb8a: 0xcc, # ARABIC LETTER JEH ISOLATED FORM - 0xfb8c: 0xca, # ARABIC LETTER RREH ISOLATED FORM - 0xfb92: 0xe5, # ARABIC LETTER GAF ISOLATED FORM - 0xfb94: 0xe6, # ARABIC LETTER GAF INITIAL FORM - 0xfb9e: 0xec, # ARABIC LETTER NOON GHUNNA ISOLATED FORM - 0xfba6: 0xf1, # ARABIC LETTER HEH GOAL ISOLATED FORM - 0xfba8: 0xf2, # ARABIC LETTER HEH GOAL INITIAL FORM - 0xfba9: 0xf3, # ARABIC LETTER HEH GOAL MEDIAL FORM - 0xfbaa: 0xf4, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM - 0xfbae: 0xfd, # ARABIC LETTER YEH BARREE ISOLATED FORM - 0xfbb0: 0xfc, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM - 0xfe7c: 0xfe, # ARABIC SHADDA ISOLATED FORM - 0xfe7d: 0xff, # ARABIC SHADDA MEDIAL FORM - 0xfe80: 0xf5, # ARABIC LETTER HAMZA ISOLATED FORM - 0xfe81: 0xaf, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - 0xfe85: 0xef, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - 0xfe89: 0xf6, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM - 0xfe8a: 0xf7, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM - 0xfe8b: 0xf8, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - 0xfe8d: 0xb0, # ARABIC LETTER ALEF ISOLATED FORM - 0xfe8e: None, # ARABIC LETTER ALEF FINAL FORM - 0xfe8f: 0xb3, # ARABIC LETTER BEH ISOLATED FORM - 0xfe91: 0xb4, # ARABIC LETTER BEH INITIAL FORM - 0xfe93: 0xb7, # ARABIC LETTER TEH MARBUTA ISOLATED FORM - 0xfe95: 0xb8, # ARABIC LETTER TEH ISOLATED FORM - 0xfe97: 0xb9, # ARABIC LETTER TEH INITIAL FORM - 0xfe99: 0xbc, # ARABIC LETTER THEH ISOLATED FORM - 0xfe9b: 0xbd, # ARABIC LETTER THEH INITIAL FORM - 0xfe9d: 0xbe, # ARABIC LETTER JEEM ISOLATED FORM - 0xfe9f: 0xbf, # ARABIC LETTER JEEM INITIAL FORM - 0xfea1: 0xc2, # ARABIC LETTER HAH ISOLATED FORM - 0xfea3: 0xc3, # ARABIC LETTER HAH INITIAL FORM - 0xfea5: 0xc4, # ARABIC LETTER KHAH ISOLATED FORM - 0xfea7: 0xc5, # ARABIC LETTER KHAH INITIAL FORM - 0xfea9: 0xc6, # ARABIC LETTER DAL ISOLATED FORM - 0xfeab: 0xc8, # ARABIC LETTER THAL ISOLATED FORM - 0xfead: 0xc9, # ARABIC LETTER REH ISOLATED FORM - 0xfeaf: 0xcb, # ARABIC LETTER ZAIN ISOLATED FORM - 0xfeb1: 0xcd, # ARABIC LETTER SEEN ISOLATED FORM - 0xfeb3: 0xce, # ARABIC LETTER SEEN INITIAL FORM - 0xfeb5: 0xcf, # ARABIC LETTER SHEEN ISOLATED FORM - 0xfeb7: 0xd0, # ARABIC LETTER SHEEN INITIAL FORM - 0xfeb9: 0xd1, # ARABIC LETTER SAD ISOLATED FORM - 0xfebb: 0xd2, # ARABIC LETTER SAD INITIAL FORM - 0xfebd: 0xd3, # ARABIC LETTER DAD ISOLATED FORM - 0xfebf: 0xd4, # ARABIC LETTER DAD INITIAL FORM - 0xfec1: 0xd5, # ARABIC LETTER TAH ISOLATED FORM - 0xfec5: 0xd6, # ARABIC LETTER ZAH ISOLATED FORM - 0xfec9: 0xd7, # ARABIC LETTER AIN ISOLATED FORM - 0xfeca: 0xd8, # ARABIC LETTER AIN FINAL FORM - 0xfecb: 0xd9, # ARABIC LETTER AIN INITIAL FORM - 0xfecc: 0xda, # ARABIC LETTER AIN MEDIAL FORM - 0xfecd: 0xdb, # ARABIC LETTER GHAIN ISOLATED FORM - 0xfece: 0xdc, # ARABIC LETTER GHAIN FINAL FORM - 0xfecf: 0xdd, # ARABIC LETTER GHAIN INITIAL FORM - 0xfed0: 0xde, # ARABIC LETTER GHAIN MEDIAL FORM - 0xfed1: 0xdf, # ARABIC LETTER FEH ISOLATED FORM - 0xfed3: 0xe0, # ARABIC LETTER FEH INITIAL FORM - 0xfed5: 0xe1, # ARABIC LETTER QAF ISOLATED FORM - 0xfed7: 0xe2, # ARABIC LETTER QAF INITIAL FORM - 0xfed9: 0xe3, # ARABIC LETTER KAF ISOLATED FORM - 0xfedb: 0xe4, # ARABIC LETTER KAF INITIAL FORM - 0xfedd: 0xe7, # ARABIC LETTER LAM ISOLATED FORM - 0xfedf: 0xe8, # ARABIC LETTER LAM INITIAL FORM - 0xfee0: 0xe9, # ARABIC LETTER LAM MEDIAL FORM - 0xfee1: 0xea, # ARABIC LETTER MEEM ISOLATED FORM - 0xfee3: 0xeb, # ARABIC LETTER MEEM INITIAL FORM - 0xfee5: 0xed, # ARABIC LETTER NOON ISOLATED FORM - 0xfee7: 0xee, # ARABIC LETTER NOON INITIAL FORM - 0xfeed: 0xf0, # ARABIC LETTER WAW ISOLATED FORM - 0xfef1: 0xf9, # ARABIC LETTER YEH ISOLATED FORM - 0xfef2: 0xfa, # ARABIC LETTER YEH FINAL FORM - 0xfef3: 0xfb, # ARABIC LETTER YEH INITIAL FORM -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00AD: 0xAD, # SOFT HYPHEN + 0x060C: 0xAB, # ARABIC COMMA + 0x061B: 0xAC, # ARABIC SEMICOLON + 0x061F: 0xAE, # ARABIC QUESTION MARK + 0x06F0: 0xA1, # EXTENDED ARABIC-INDIC DIGIT ZERO + 0x06F1: 0xA2, # EXTENDED ARABIC-INDIC DIGIT ONE + 0x06F2: 0xA3, # EXTENDED ARABIC-INDIC DIGIT TWO + 0x06F3: 0xA4, # EXTENDED ARABIC-INDIC DIGIT THREE + 0x06F4: 0xA5, # EXTENDED ARABIC-INDIC DIGIT FOUR + 0x06F5: 0xA6, # EXTENDED ARABIC-INDIC DIGIT FIVE + 0x06F6: 0xA7, # EXTENDED ARABIC-INDIC DIGIT SIX + 0x06F7: 0xA8, # EXTENDED ARABIC-INDIC DIGIT SEVEN + 0x06F8: 0xA9, # EXTENDED ARABIC-INDIC DIGIT EIGHT + 0x06F9: 0xAA, # EXTENDED ARABIC-INDIC DIGIT NINE + 0xFB56: 0xB5, # ARABIC LETTER PEH ISOLATED FORM + 0xFB58: 0xB6, # ARABIC LETTER PEH INITIAL FORM + 0xFB66: 0xBA, # ARABIC LETTER TTEH ISOLATED FORM + 0xFB68: 0xBB, # ARABIC LETTER TTEH INITIAL FORM + 0xFB7A: 0xC0, # ARABIC LETTER TCHEH ISOLATED FORM + 0xFB7C: 0xC1, # ARABIC LETTER TCHEH INITIAL FORM + 0xFB84: 0xC7, # ARABIC LETTER DAHAL ISOLATED FORMN + 0xFB8A: 0xCC, # ARABIC LETTER JEH ISOLATED FORM + 0xFB8C: 0xCA, # ARABIC LETTER RREH ISOLATED FORM + 0xFB92: 0xE5, # ARABIC LETTER GAF ISOLATED FORM + 0xFB94: 0xE6, # ARABIC LETTER GAF INITIAL FORM + 0xFB9E: 0xEC, # ARABIC LETTER NOON GHUNNA ISOLATED FORM + 0xFBA6: 0xF1, # ARABIC LETTER HEH GOAL ISOLATED FORM + 0xFBA8: 0xF2, # ARABIC LETTER HEH GOAL INITIAL FORM + 0xFBA9: 0xF3, # ARABIC LETTER HEH GOAL MEDIAL FORM + 0xFBAA: 0xF4, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM + 0xFBAE: 0xFD, # ARABIC LETTER YEH BARREE ISOLATED FORM + 0xFBB0: 0xFC, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM + 0xFE7C: 0xFE, # ARABIC SHADDA ISOLATED FORM + 0xFE7D: 0xFF, # ARABIC SHADDA MEDIAL FORM + 0xFE80: 0xF5, # ARABIC LETTER HAMZA ISOLATED FORM + 0xFE81: 0xAF, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0xFE85: 0xEF, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0xFE89: 0xF6, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM + 0xFE8A: 0xF7, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM + 0xFE8B: 0xF8, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0xFE8D: 0xB0, # ARABIC LETTER ALEF ISOLATED FORM + 0xFE8E: None, # ARABIC LETTER ALEF FINAL FORM + 0xFE8F: 0xB3, # ARABIC LETTER BEH ISOLATED FORM + 0xFE91: 0xB4, # ARABIC LETTER BEH INITIAL FORM + 0xFE93: 0xB7, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0xFE95: 0xB8, # ARABIC LETTER TEH ISOLATED FORM + 0xFE97: 0xB9, # ARABIC LETTER TEH INITIAL FORM + 0xFE99: 0xBC, # ARABIC LETTER THEH ISOLATED FORM + 0xFE9B: 0xBD, # ARABIC LETTER THEH INITIAL FORM + 0xFE9D: 0xBE, # ARABIC LETTER JEEM ISOLATED FORM + 0xFE9F: 0xBF, # ARABIC LETTER JEEM INITIAL FORM + 0xFEA1: 0xC2, # ARABIC LETTER HAH ISOLATED FORM + 0xFEA3: 0xC3, # ARABIC LETTER HAH INITIAL FORM + 0xFEA5: 0xC4, # ARABIC LETTER KHAH ISOLATED FORM + 0xFEA7: 0xC5, # ARABIC LETTER KHAH INITIAL FORM + 0xFEA9: 0xC6, # ARABIC LETTER DAL ISOLATED FORM + 0xFEAB: 0xC8, # ARABIC LETTER THAL ISOLATED FORM + 0xFEAD: 0xC9, # ARABIC LETTER REH ISOLATED FORM + 0xFEAF: 0xCB, # ARABIC LETTER ZAIN ISOLATED FORM + 0xFEB1: 0xCD, # ARABIC LETTER SEEN ISOLATED FORM + 0xFEB3: 0xCE, # ARABIC LETTER SEEN INITIAL FORM + 0xFEB5: 0xCF, # ARABIC LETTER SHEEN ISOLATED FORM + 0xFEB7: 0xD0, # ARABIC LETTER SHEEN INITIAL FORM + 0xFEB9: 0xD1, # ARABIC LETTER SAD ISOLATED FORM + 0xFEBB: 0xD2, # ARABIC LETTER SAD INITIAL FORM + 0xFEBD: 0xD3, # ARABIC LETTER DAD ISOLATED FORM + 0xFEBF: 0xD4, # ARABIC LETTER DAD INITIAL FORM + 0xFEC1: 0xD5, # ARABIC LETTER TAH ISOLATED FORM + 0xFEC5: 0xD6, # ARABIC LETTER ZAH ISOLATED FORM + 0xFEC9: 0xD7, # ARABIC LETTER AIN ISOLATED FORM + 0xFECA: 0xD8, # ARABIC LETTER AIN FINAL FORM + 0xFECB: 0xD9, # ARABIC LETTER AIN INITIAL FORM + 0xFECC: 0xDA, # ARABIC LETTER AIN MEDIAL FORM + 0xFECD: 0xDB, # ARABIC LETTER GHAIN ISOLATED FORM + 0xFECE: 0xDC, # ARABIC LETTER GHAIN FINAL FORM + 0xFECF: 0xDD, # ARABIC LETTER GHAIN INITIAL FORM + 0xFED0: 0xDE, # ARABIC LETTER GHAIN MEDIAL FORM + 0xFED1: 0xDF, # ARABIC LETTER FEH ISOLATED FORM + 0xFED3: 0xE0, # ARABIC LETTER FEH INITIAL FORM + 0xFED5: 0xE1, # ARABIC LETTER QAF ISOLATED FORM + 0xFED7: 0xE2, # ARABIC LETTER QAF INITIAL FORM + 0xFED9: 0xE3, # ARABIC LETTER KAF ISOLATED FORM + 0xFEDB: 0xE4, # ARABIC LETTER KAF INITIAL FORM + 0xFEDD: 0xE7, # ARABIC LETTER LAM ISOLATED FORM + 0xFEDF: 0xE8, # ARABIC LETTER LAM INITIAL FORM + 0xFEE0: 0xE9, # ARABIC LETTER LAM MEDIAL FORM + 0xFEE1: 0xEA, # ARABIC LETTER MEEM ISOLATED FORM + 0xFEE3: 0xEB, # ARABIC LETTER MEEM INITIAL FORM + 0xFEE5: 0xED, # ARABIC LETTER NOON ISOLATED FORM + 0xFEE7: 0xEE, # ARABIC LETTER NOON INITIAL FORM + 0xFEED: 0xF0, # ARABIC LETTER WAW ISOLATED FORM + 0xFEF1: 0xF9, # ARABIC LETTER YEH ISOLATED FORM + 0xFEF2: 0xFA, # ARABIC LETTER YEH FINAL FORM + 0xFEF3: 0xFB, # ARABIC LETTER YEH INITIAL FORM +} + Index: cp1026.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1026.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp1026.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp1026.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x7f' # 0x07 -> DELETE u'\x97' # 0x08 -> CONTROL u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0a -> CONTROL - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\x8e' # 0x0A -> CONTROL + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x87' # 0x17 -> CONTROL u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1a -> CONTROL - u'\x8f' # 0x1b -> CONTROL - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x92' # 0x1A -> CONTROL + u'\x8f' # 0x1B -> CONTROL + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u'\x80' # 0x20 -> CONTROL u'\x81' # 0x21 -> CONTROL u'\x82' # 0x22 -> CONTROL @@ -74,12 +74,12 @@ u'\x1b' # 0x27 -> ESCAPE u'\x88' # 0x28 -> CONTROL u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2a -> CONTROL - u'\x8b' # 0x2b -> CONTROL - u'\x8c' # 0x2c -> CONTROL - u'\x05' # 0x2d -> ENQUIRY - u'\x06' # 0x2e -> ACKNOWLEDGE - u'\x07' # 0x2f -> BELL + u'\x8a' # 0x2A -> CONTROL + u'\x8b' # 0x2B -> CONTROL + u'\x8c' # 0x2C -> CONTROL + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL u'\x90' # 0x30 -> CONTROL u'\x91' # 0x31 -> CONTROL u'\x16' # 0x32 -> SYNCHRONOUS IDLE @@ -90,12 +90,12 @@ u'\x04' # 0x37 -> END OF TRANSMISSION u'\x98' # 0x38 -> CONTROL u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3a -> CONTROL - u'\x9b' # 0x3b -> CONTROL - u'\x14' # 0x3c -> DEVICE CONTROL FOUR - u'\x15' # 0x3d -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3e -> CONTROL - u'\x1a' # 0x3f -> SUBSTITUTE + u'\x9a' # 0x3A -> CONTROL + u'\x9b' # 0x3B -> CONTROL + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> CONTROL + u'\x1a' # 0x3F -> SUBSTITUTE u' ' # 0x40 -> SPACE u'\xa0' # 0x41 -> NO-BREAK SPACE u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX @@ -106,12 +106,12 @@ u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE u'{' # 0x48 -> LEFT CURLY BRACKET u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'\xc7' # 0x4a -> LATIN CAPITAL LETTER C WITH CEDILLA - u'.' # 0x4b -> FULL STOP - u'<' # 0x4c -> LESS-THAN SIGN - u'(' # 0x4d -> LEFT PARENTHESIS - u'+' # 0x4e -> PLUS SIGN - u'!' # 0x4f -> EXCLAMATION MARK + u'\xc7' # 0x4A -> LATIN CAPITAL LETTER C WITH CEDILLA + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'!' # 0x4F -> EXCLAMATION MARK u'&' # 0x50 -> AMPERSAND u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX @@ -122,12 +122,12 @@ u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'\u011e' # 0x5a -> LATIN CAPITAL LETTER G WITH BREVE - u'\u0130' # 0x5b -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'*' # 0x5c -> ASTERISK - u')' # 0x5d -> RIGHT PARENTHESIS - u';' # 0x5e -> SEMICOLON - u'^' # 0x5f -> CIRCUMFLEX ACCENT + u'\u011e' # 0x5A -> LATIN CAPITAL LETTER G WITH BREVE + u'\u0130' # 0x5B -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'^' # 0x5F -> CIRCUMFLEX ACCENT u'-' # 0x60 -> HYPHEN-MINUS u'/' # 0x61 -> SOLIDUS u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX @@ -138,12 +138,12 @@ u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'[' # 0x68 -> LEFT SQUARE BRACKET u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\u015f' # 0x6a -> LATIN SMALL LETTER S WITH CEDILLA - u',' # 0x6b -> COMMA - u'%' # 0x6c -> PERCENT SIGN - u'_' # 0x6d -> LOW LINE - u'>' # 0x6e -> GREATER-THAN SIGN - u'?' # 0x6f -> QUESTION MARK + u'\u015f' # 0x6A -> LATIN SMALL LETTER S WITH CEDILLA + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX @@ -154,12 +154,12 @@ u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE u'\u0131' # 0x79 -> LATIN SMALL LETTER DOTLESS I - u':' # 0x7a -> COLON - u'\xd6' # 0x7b -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u015e' # 0x7c -> LATIN CAPITAL LETTER S WITH CEDILLA - u"'" # 0x7d -> APOSTROPHE - u'=' # 0x7e -> EQUALS SIGN - u'\xdc' # 0x7f -> LATIN CAPITAL LETTER U WITH DIAERESIS + u':' # 0x7A -> COLON + u'\xd6' # 0x7B -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u015e' # 0x7C -> LATIN CAPITAL LETTER S WITH CEDILLA + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'\xdc' # 0x7F -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE u'a' # 0x81 -> LATIN SMALL LETTER A u'b' # 0x82 -> LATIN SMALL LETTER B @@ -170,12 +170,12 @@ u'g' # 0x87 -> LATIN SMALL LETTER G u'h' # 0x88 -> LATIN SMALL LETTER H u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8a -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8b -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'}' # 0x8c -> RIGHT CURLY BRACKET - u'`' # 0x8d -> GRAVE ACCENT - u'\xa6' # 0x8e -> BROKEN BAR - u'\xb1' # 0x8f -> PLUS-MINUS SIGN + u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'}' # 0x8C -> RIGHT CURLY BRACKET + u'`' # 0x8D -> GRAVE ACCENT + u'\xa6' # 0x8E -> BROKEN BAR + u'\xb1' # 0x8F -> PLUS-MINUS SIGN u'\xb0' # 0x90 -> DEGREE SIGN u'j' # 0x91 -> LATIN SMALL LETTER J u'k' # 0x92 -> LATIN SMALL LETTER K @@ -186,108 +186,108 @@ u'p' # 0x97 -> LATIN SMALL LETTER P u'q' # 0x98 -> LATIN SMALL LETTER Q u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9a -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9b -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9c -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9d -> CEDILLA - u'\xc6' # 0x9e -> LATIN CAPITAL LIGATURE AE - u'\xa4' # 0x9f -> CURRENCY SIGN - u'\xb5' # 0xa0 -> MICRO SIGN - u'\xf6' # 0xa1 -> LATIN SMALL LETTER O WITH DIAERESIS - u's' # 0xa2 -> LATIN SMALL LETTER S - u't' # 0xa3 -> LATIN SMALL LETTER T - u'u' # 0xa4 -> LATIN SMALL LETTER U - u'v' # 0xa5 -> LATIN SMALL LETTER V - u'w' # 0xa6 -> LATIN SMALL LETTER W - u'x' # 0xa7 -> LATIN SMALL LETTER X - u'y' # 0xa8 -> LATIN SMALL LETTER Y - u'z' # 0xa9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xaa -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xab -> INVERTED QUESTION MARK - u']' # 0xac -> RIGHT SQUARE BRACKET - u'$' # 0xad -> DOLLAR SIGN - u'@' # 0xae -> COMMERCIAL AT - u'\xae' # 0xaf -> REGISTERED SIGN - u'\xa2' # 0xb0 -> CENT SIGN - u'\xa3' # 0xb1 -> POUND SIGN - u'\xa5' # 0xb2 -> YEN SIGN - u'\xb7' # 0xb3 -> MIDDLE DOT - u'\xa9' # 0xb4 -> COPYRIGHT SIGN - u'\xa7' # 0xb5 -> SECTION SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xbc' # 0xb7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xb8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xb9 -> VULGAR FRACTION THREE QUARTERS - u'\xac' # 0xba -> NOT SIGN - u'|' # 0xbb -> VERTICAL LINE - u'\xaf' # 0xbc -> MACRON - u'\xa8' # 0xbd -> DIAERESIS - u'\xb4' # 0xbe -> ACUTE ACCENT - u'\xd7' # 0xbf -> MULTIPLICATION SIGN - u'\xe7' # 0xc0 -> LATIN SMALL LETTER C WITH CEDILLA - u'A' # 0xc1 -> LATIN CAPITAL LETTER A - u'B' # 0xc2 -> LATIN CAPITAL LETTER B - u'C' # 0xc3 -> LATIN CAPITAL LETTER C - u'D' # 0xc4 -> LATIN CAPITAL LETTER D - u'E' # 0xc5 -> LATIN CAPITAL LETTER E - u'F' # 0xc6 -> LATIN CAPITAL LETTER F - u'G' # 0xc7 -> LATIN CAPITAL LETTER G - u'H' # 0xc8 -> LATIN CAPITAL LETTER H - u'I' # 0xc9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xca -> SOFT HYPHEN - u'\xf4' # 0xcb -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'~' # 0xcc -> TILDE - u'\xf2' # 0xcd -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xce -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xcf -> LATIN SMALL LETTER O WITH TILDE - u'\u011f' # 0xd0 -> LATIN SMALL LETTER G WITH BREVE - u'J' # 0xd1 -> LATIN CAPITAL LETTER J - u'K' # 0xd2 -> LATIN CAPITAL LETTER K - u'L' # 0xd3 -> LATIN CAPITAL LETTER L - u'M' # 0xd4 -> LATIN CAPITAL LETTER M - u'N' # 0xd5 -> LATIN CAPITAL LETTER N - u'O' # 0xd6 -> LATIN CAPITAL LETTER O - u'P' # 0xd7 -> LATIN CAPITAL LETTER P - u'Q' # 0xd8 -> LATIN CAPITAL LETTER Q - u'R' # 0xd9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xda -> SUPERSCRIPT ONE - u'\xfb' # 0xdb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\\' # 0xdc -> REVERSE SOLIDUS - u'\xf9' # 0xdd -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xde -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xdf -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xfc' # 0xe0 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf7' # 0xe1 -> DIVISION SIGN - u'S' # 0xe2 -> LATIN CAPITAL LETTER S - u'T' # 0xe3 -> LATIN CAPITAL LETTER T - u'U' # 0xe4 -> LATIN CAPITAL LETTER U - u'V' # 0xe5 -> LATIN CAPITAL LETTER V - u'W' # 0xe6 -> LATIN CAPITAL LETTER W - u'X' # 0xe7 -> LATIN CAPITAL LETTER X - u'Y' # 0xe8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xe9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xea -> SUPERSCRIPT TWO - u'\xd4' # 0xeb -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'#' # 0xec -> NUMBER SIGN - u'\xd2' # 0xed -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xee -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xef -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xf0 -> DIGIT ZERO - u'1' # 0xf1 -> DIGIT ONE - u'2' # 0xf2 -> DIGIT TWO - u'3' # 0xf3 -> DIGIT THREE - u'4' # 0xf4 -> DIGIT FOUR - u'5' # 0xf5 -> DIGIT FIVE - u'6' # 0xf6 -> DIGIT SIX - u'7' # 0xf7 -> DIGIT SEVEN - u'8' # 0xf8 -> DIGIT EIGHT - u'9' # 0xf9 -> DIGIT NINE - u'\xb3' # 0xfa -> SUPERSCRIPT THREE - u'\xdb' # 0xfb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'"' # 0xfc -> QUOTATION MARK - u'\xd9' # 0xfd -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xfe -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xff -> CONTROL + u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR + u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE + u'\xb8' # 0x9D -> CEDILLA + u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE + u'\xa4' # 0x9F -> CURRENCY SIGN + u'\xb5' # 0xA0 -> MICRO SIGN + u'\xf6' # 0xA1 -> LATIN SMALL LETTER O WITH DIAERESIS + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK + u'\xbf' # 0xAB -> INVERTED QUESTION MARK + u']' # 0xAC -> RIGHT SQUARE BRACKET + u'$' # 0xAD -> DOLLAR SIGN + u'@' # 0xAE -> COMMERCIAL AT + u'\xae' # 0xAF -> REGISTERED SIGN + u'\xa2' # 0xB0 -> CENT SIGN + u'\xa3' # 0xB1 -> POUND SIGN + u'\xa5' # 0xB2 -> YEN SIGN + u'\xb7' # 0xB3 -> MIDDLE DOT + u'\xa9' # 0xB4 -> COPYRIGHT SIGN + u'\xa7' # 0xB5 -> SECTION SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS + u'\xac' # 0xBA -> NOT SIGN + u'|' # 0xBB -> VERTICAL LINE + u'\xaf' # 0xBC -> MACRON + u'\xa8' # 0xBD -> DIAERESIS + u'\xb4' # 0xBE -> ACUTE ACCENT + u'\xd7' # 0xBF -> MULTIPLICATION SIGN + u'\xe7' # 0xC0 -> LATIN SMALL LETTER C WITH CEDILLA + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'~' # 0xCC -> TILDE + u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE + u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE + u'\u011f' # 0xD0 -> LATIN SMALL LETTER G WITH BREVE + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xDA -> SUPERSCRIPT ONE + u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\\' # 0xDC -> REVERSE SOLIDUS + u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE + u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xfc' # 0xE0 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xf7' # 0xE1 -> DIVISION SIGN + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'#' # 0xEC -> NUMBER SIGN + u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'"' # 0xFC -> QUOTATION MARK + u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE + u'\x9f' # 0xFF -> CONTROL ) ### Encoding Map @@ -298,98 +298,98 @@ 0x0002: 0x02, # START OF TEXT 0x0003: 0x03, # END OF TEXT 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2d, # ENQUIRY - 0x0006: 0x2e, # ACKNOWLEDGE - 0x0007: 0x2f, # BELL + 0x0005: 0x2D, # ENQUIRY + 0x0006: 0x2E, # ACKNOWLEDGE + 0x0007: 0x2F, # BELL 0x0008: 0x16, # BACKSPACE 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000a: 0x25, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x25, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3c, # DEVICE CONTROL FOUR - 0x0015: 0x3d, # NEGATIVE ACKNOWLEDGE + 0x0014: 0x3C, # DEVICE CONTROL FOUR + 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE 0x0016: 0x32, # SYNCHRONOUS IDLE 0x0017: 0x26, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x3f, # SUBSTITUTE - 0x001b: 0x27, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x3F, # SUBSTITUTE + 0x001B: 0x27, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x40, # SPACE - 0x0021: 0x4f, # EXCLAMATION MARK - 0x0022: 0xfc, # QUOTATION MARK - 0x0023: 0xec, # NUMBER SIGN - 0x0024: 0xad, # DOLLAR SIGN - 0x0025: 0x6c, # PERCENT SIGN + 0x0021: 0x4F, # EXCLAMATION MARK + 0x0022: 0xFC, # QUOTATION MARK + 0x0023: 0xEC, # NUMBER SIGN + 0x0024: 0xAD, # DOLLAR SIGN + 0x0025: 0x6C, # PERCENT SIGN 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7d, # APOSTROPHE - 0x0028: 0x4d, # LEFT PARENTHESIS - 0x0029: 0x5d, # RIGHT PARENTHESIS - 0x002a: 0x5c, # ASTERISK - 0x002b: 0x4e, # PLUS SIGN - 0x002c: 0x6b, # COMMA - 0x002d: 0x60, # HYPHEN-MINUS - 0x002e: 0x4b, # FULL STOP - 0x002f: 0x61, # SOLIDUS - 0x0030: 0xf0, # DIGIT ZERO - 0x0031: 0xf1, # DIGIT ONE - 0x0032: 0xf2, # DIGIT TWO - 0x0033: 0xf3, # DIGIT THREE - 0x0034: 0xf4, # DIGIT FOUR - 0x0035: 0xf5, # DIGIT FIVE - 0x0036: 0xf6, # DIGIT SIX - 0x0037: 0xf7, # DIGIT SEVEN - 0x0038: 0xf8, # DIGIT EIGHT - 0x0039: 0xf9, # DIGIT NINE - 0x003a: 0x7a, # COLON - 0x003b: 0x5e, # SEMICOLON - 0x003c: 0x4c, # LESS-THAN SIGN - 0x003d: 0x7e, # EQUALS SIGN - 0x003e: 0x6e, # GREATER-THAN SIGN - 0x003f: 0x6f, # QUESTION MARK - 0x0040: 0xae, # COMMERCIAL AT - 0x0041: 0xc1, # LATIN CAPITAL LETTER A - 0x0042: 0xc2, # LATIN CAPITAL LETTER B - 0x0043: 0xc3, # LATIN CAPITAL LETTER C - 0x0044: 0xc4, # LATIN CAPITAL LETTER D - 0x0045: 0xc5, # LATIN CAPITAL LETTER E - 0x0046: 0xc6, # LATIN CAPITAL LETTER F - 0x0047: 0xc7, # LATIN CAPITAL LETTER G - 0x0048: 0xc8, # LATIN CAPITAL LETTER H - 0x0049: 0xc9, # LATIN CAPITAL LETTER I - 0x004a: 0xd1, # LATIN CAPITAL LETTER J - 0x004b: 0xd2, # LATIN CAPITAL LETTER K - 0x004c: 0xd3, # LATIN CAPITAL LETTER L - 0x004d: 0xd4, # LATIN CAPITAL LETTER M - 0x004e: 0xd5, # LATIN CAPITAL LETTER N - 0x004f: 0xd6, # LATIN CAPITAL LETTER O - 0x0050: 0xd7, # LATIN CAPITAL LETTER P - 0x0051: 0xd8, # LATIN CAPITAL LETTER Q - 0x0052: 0xd9, # LATIN CAPITAL LETTER R - 0x0053: 0xe2, # LATIN CAPITAL LETTER S - 0x0054: 0xe3, # LATIN CAPITAL LETTER T - 0x0055: 0xe4, # LATIN CAPITAL LETTER U - 0x0056: 0xe5, # LATIN CAPITAL LETTER V - 0x0057: 0xe6, # LATIN CAPITAL LETTER W - 0x0058: 0xe7, # LATIN CAPITAL LETTER X - 0x0059: 0xe8, # LATIN CAPITAL LETTER Y - 0x005a: 0xe9, # LATIN CAPITAL LETTER Z - 0x005b: 0x68, # LEFT SQUARE BRACKET - 0x005c: 0xdc, # REVERSE SOLIDUS - 0x005d: 0xac, # RIGHT SQUARE BRACKET - 0x005e: 0x5f, # CIRCUMFLEX ACCENT - 0x005f: 0x6d, # LOW LINE - 0x0060: 0x8d, # GRAVE ACCENT + 0x0027: 0x7D, # APOSTROPHE + 0x0028: 0x4D, # LEFT PARENTHESIS + 0x0029: 0x5D, # RIGHT PARENTHESIS + 0x002A: 0x5C, # ASTERISK + 0x002B: 0x4E, # PLUS SIGN + 0x002C: 0x6B, # COMMA + 0x002D: 0x60, # HYPHEN-MINUS + 0x002E: 0x4B, # FULL STOP + 0x002F: 0x61, # SOLIDUS + 0x0030: 0xF0, # DIGIT ZERO + 0x0031: 0xF1, # DIGIT ONE + 0x0032: 0xF2, # DIGIT TWO + 0x0033: 0xF3, # DIGIT THREE + 0x0034: 0xF4, # DIGIT FOUR + 0x0035: 0xF5, # DIGIT FIVE + 0x0036: 0xF6, # DIGIT SIX + 0x0037: 0xF7, # DIGIT SEVEN + 0x0038: 0xF8, # DIGIT EIGHT + 0x0039: 0xF9, # DIGIT NINE + 0x003A: 0x7A, # COLON + 0x003B: 0x5E, # SEMICOLON + 0x003C: 0x4C, # LESS-THAN SIGN + 0x003D: 0x7E, # EQUALS SIGN + 0x003E: 0x6E, # GREATER-THAN SIGN + 0x003F: 0x6F, # QUESTION MARK + 0x0040: 0xAE, # COMMERCIAL AT + 0x0041: 0xC1, # LATIN CAPITAL LETTER A + 0x0042: 0xC2, # LATIN CAPITAL LETTER B + 0x0043: 0xC3, # LATIN CAPITAL LETTER C + 0x0044: 0xC4, # LATIN CAPITAL LETTER D + 0x0045: 0xC5, # LATIN CAPITAL LETTER E + 0x0046: 0xC6, # LATIN CAPITAL LETTER F + 0x0047: 0xC7, # LATIN CAPITAL LETTER G + 0x0048: 0xC8, # LATIN CAPITAL LETTER H + 0x0049: 0xC9, # LATIN CAPITAL LETTER I + 0x004A: 0xD1, # LATIN CAPITAL LETTER J + 0x004B: 0xD2, # LATIN CAPITAL LETTER K + 0x004C: 0xD3, # LATIN CAPITAL LETTER L + 0x004D: 0xD4, # LATIN CAPITAL LETTER M + 0x004E: 0xD5, # LATIN CAPITAL LETTER N + 0x004F: 0xD6, # LATIN CAPITAL LETTER O + 0x0050: 0xD7, # LATIN CAPITAL LETTER P + 0x0051: 0xD8, # LATIN CAPITAL LETTER Q + 0x0052: 0xD9, # LATIN CAPITAL LETTER R + 0x0053: 0xE2, # LATIN CAPITAL LETTER S + 0x0054: 0xE3, # LATIN CAPITAL LETTER T + 0x0055: 0xE4, # LATIN CAPITAL LETTER U + 0x0056: 0xE5, # LATIN CAPITAL LETTER V + 0x0057: 0xE6, # LATIN CAPITAL LETTER W + 0x0058: 0xE7, # LATIN CAPITAL LETTER X + 0x0059: 0xE8, # LATIN CAPITAL LETTER Y + 0x005A: 0xE9, # LATIN CAPITAL LETTER Z + 0x005B: 0x68, # LEFT SQUARE BRACKET + 0x005C: 0xDC, # REVERSE SOLIDUS + 0x005D: 0xAC, # RIGHT SQUARE BRACKET + 0x005E: 0x5F, # CIRCUMFLEX ACCENT + 0x005F: 0x6D, # LOW LINE + 0x0060: 0x8D, # GRAVE ACCENT 0x0061: 0x81, # LATIN SMALL LETTER A 0x0062: 0x82, # LATIN SMALL LETTER B 0x0063: 0x83, # LATIN SMALL LETTER C @@ -399,28 +399,28 @@ 0x0067: 0x87, # LATIN SMALL LETTER G 0x0068: 0x88, # LATIN SMALL LETTER H 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006a: 0x91, # LATIN SMALL LETTER J - 0x006b: 0x92, # LATIN SMALL LETTER K - 0x006c: 0x93, # LATIN SMALL LETTER L - 0x006d: 0x94, # LATIN SMALL LETTER M - 0x006e: 0x95, # LATIN SMALL LETTER N - 0x006f: 0x96, # LATIN SMALL LETTER O + 0x006A: 0x91, # LATIN SMALL LETTER J + 0x006B: 0x92, # LATIN SMALL LETTER K + 0x006C: 0x93, # LATIN SMALL LETTER L + 0x006D: 0x94, # LATIN SMALL LETTER M + 0x006E: 0x95, # LATIN SMALL LETTER N + 0x006F: 0x96, # LATIN SMALL LETTER O 0x0070: 0x97, # LATIN SMALL LETTER P 0x0071: 0x98, # LATIN SMALL LETTER Q 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xa2, # LATIN SMALL LETTER S - 0x0074: 0xa3, # LATIN SMALL LETTER T - 0x0075: 0xa4, # LATIN SMALL LETTER U - 0x0076: 0xa5, # LATIN SMALL LETTER V - 0x0077: 0xa6, # LATIN SMALL LETTER W - 0x0078: 0xa7, # LATIN SMALL LETTER X - 0x0079: 0xa8, # LATIN SMALL LETTER Y - 0x007a: 0xa9, # LATIN SMALL LETTER Z - 0x007b: 0x48, # LEFT CURLY BRACKET - 0x007c: 0xbb, # VERTICAL LINE - 0x007d: 0x8c, # RIGHT CURLY BRACKET - 0x007e: 0xcc, # TILDE - 0x007f: 0x07, # DELETE + 0x0073: 0xA2, # LATIN SMALL LETTER S + 0x0074: 0xA3, # LATIN SMALL LETTER T + 0x0075: 0xA4, # LATIN SMALL LETTER U + 0x0076: 0xA5, # LATIN SMALL LETTER V + 0x0077: 0xA6, # LATIN SMALL LETTER W + 0x0078: 0xA7, # LATIN SMALL LETTER X + 0x0079: 0xA8, # LATIN SMALL LETTER Y + 0x007A: 0xA9, # LATIN SMALL LETTER Z + 0x007B: 0x48, # LEFT CURLY BRACKET + 0x007C: 0xBB, # VERTICAL LINE + 0x007D: 0x8C, # RIGHT CURLY BRACKET + 0x007E: 0xCC, # TILDE + 0x007F: 0x07, # DELETE 0x0080: 0x20, # CONTROL 0x0081: 0x21, # CONTROL 0x0082: 0x22, # CONTROL @@ -431,15 +431,15 @@ 0x0087: 0x17, # CONTROL 0x0088: 0x28, # CONTROL 0x0089: 0x29, # CONTROL - 0x008a: 0x2a, # CONTROL - 0x008b: 0x2b, # CONTROL - 0x008c: 0x2c, # CONTROL - 0x008d: 0x09, # CONTROL - 0x008e: 0x0a, # CONTROL - 0x008f: 0x1b, # CONTROL + 0x008A: 0x2A, # CONTROL + 0x008B: 0x2B, # CONTROL + 0x008C: 0x2C, # CONTROL + 0x008D: 0x09, # CONTROL + 0x008E: 0x0A, # CONTROL + 0x008F: 0x1B, # CONTROL 0x0090: 0x30, # CONTROL 0x0091: 0x31, # CONTROL - 0x0092: 0x1a, # CONTROL + 0x0092: 0x1A, # CONTROL 0x0093: 0x33, # CONTROL 0x0094: 0x34, # CONTROL 0x0095: 0x35, # CONTROL @@ -447,106 +447,107 @@ 0x0097: 0x08, # CONTROL 0x0098: 0x38, # CONTROL 0x0099: 0x39, # CONTROL - 0x009a: 0x3a, # CONTROL - 0x009b: 0x3b, # CONTROL - 0x009c: 0x04, # CONTROL - 0x009d: 0x14, # CONTROL - 0x009e: 0x3e, # CONTROL - 0x009f: 0xff, # CONTROL - 0x00a0: 0x41, # NO-BREAK SPACE - 0x00a1: 0xaa, # INVERTED EXCLAMATION MARK - 0x00a2: 0xb0, # CENT SIGN - 0x00a3: 0xb1, # POUND SIGN - 0x00a4: 0x9f, # CURRENCY SIGN - 0x00a5: 0xb2, # YEN SIGN - 0x00a6: 0x8e, # BROKEN BAR - 0x00a7: 0xb5, # SECTION SIGN - 0x00a8: 0xbd, # DIAERESIS - 0x00a9: 0xb4, # COPYRIGHT SIGN - 0x00aa: 0x9a, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x8a, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xba, # NOT SIGN - 0x00ad: 0xca, # SOFT HYPHEN - 0x00ae: 0xaf, # REGISTERED SIGN - 0x00af: 0xbc, # MACRON - 0x00b0: 0x90, # DEGREE SIGN - 0x00b1: 0x8f, # PLUS-MINUS SIGN - 0x00b2: 0xea, # SUPERSCRIPT TWO - 0x00b3: 0xfa, # SUPERSCRIPT THREE - 0x00b4: 0xbe, # ACUTE ACCENT - 0x00b5: 0xa0, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb3, # MIDDLE DOT - 0x00b8: 0x9d, # CEDILLA - 0x00b9: 0xda, # SUPERSCRIPT ONE - 0x00ba: 0x9b, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x8b, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xb7, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xb8, # VULGAR FRACTION ONE HALF - 0x00be: 0xb9, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0xab, # INVERTED QUESTION MARK - 0x00c0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x9e, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x4a, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xed, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xee, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xeb, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xef, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x7b, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xbf, # MULTIPLICATION SIGN - 0x00d8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0xfd, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xfe, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xfb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x7f, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e0: 0x44, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x45, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x46, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x9c, # LATIN SMALL LIGATURE AE - 0x00e7: 0xc0, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x54, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x51, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x58, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x55, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x49, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0xcd, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0xce, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xcb, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0xcf, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0xa1, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xe1, # DIVISION SIGN - 0x00f8: 0x70, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0xdd, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0xde, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0xdb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xe0, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0xdf, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x011e: 0x5a, # LATIN CAPITAL LETTER G WITH BREVE - 0x011f: 0xd0, # LATIN SMALL LETTER G WITH BREVE - 0x0130: 0x5b, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x009A: 0x3A, # CONTROL + 0x009B: 0x3B, # CONTROL + 0x009C: 0x04, # CONTROL + 0x009D: 0x14, # CONTROL + 0x009E: 0x3E, # CONTROL + 0x009F: 0xFF, # CONTROL + 0x00A0: 0x41, # NO-BREAK SPACE + 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK + 0x00A2: 0xB0, # CENT SIGN + 0x00A3: 0xB1, # POUND SIGN + 0x00A4: 0x9F, # CURRENCY SIGN + 0x00A5: 0xB2, # YEN SIGN + 0x00A6: 0x8E, # BROKEN BAR + 0x00A7: 0xB5, # SECTION SIGN + 0x00A8: 0xBD, # DIAERESIS + 0x00A9: 0xB4, # COPYRIGHT SIGN + 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR + 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xBA, # NOT SIGN + 0x00AD: 0xCA, # SOFT HYPHEN + 0x00AE: 0xAF, # REGISTERED SIGN + 0x00AF: 0xBC, # MACRON + 0x00B0: 0x90, # DEGREE SIGN + 0x00B1: 0x8F, # PLUS-MINUS SIGN + 0x00B2: 0xEA, # SUPERSCRIPT TWO + 0x00B3: 0xFA, # SUPERSCRIPT THREE + 0x00B4: 0xBE, # ACUTE ACCENT + 0x00B5: 0xA0, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB3, # MIDDLE DOT + 0x00B8: 0x9D, # CEDILLA + 0x00B9: 0xDA, # SUPERSCRIPT ONE + 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF + 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xAB, # INVERTED QUESTION MARK + 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE + 0x00C7: 0x4A, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0x7B, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xBF, # MULTIPLICATION SIGN + 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0x7F, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE + 0x00E7: 0xC0, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xA1, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xE1, # DIVISION SIGN + 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xE0, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011E: 0x5A, # LATIN CAPITAL LETTER G WITH BREVE + 0x011F: 0xD0, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0x5B, # LATIN CAPITAL LETTER I WITH DOT ABOVE 0x0131: 0x79, # LATIN SMALL LETTER DOTLESS I - 0x015e: 0x7c, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015f: 0x6a, # LATIN SMALL LETTER S WITH CEDILLA -} \ No newline at end of file + 0x015E: 0x7C, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015F: 0x6A, # LATIN SMALL LETTER S WITH CEDILLA +} + Index: cp1140.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1140.py,v retrieving revision 1.3 retrieving revision 1.4 diff -u -d -r1.3 -r1.4 --- cp1140.py 24 Oct 2005 12:07:48 -0000 1.3 +++ cp1140.py 24 Oct 2005 12:14:59 -0000 1.4 @@ -42,12 +42,12 @@ u'\x7f' # 0x07 -> DELETE u'\x97' # 0x08 -> CONTROL u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0a -> CONTROL - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\x8e' # 0x0A -> CONTROL + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x87' # 0x17 -> CONTROL u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1a -> CONTROL - u'\x8f' # 0x1b -> CONTROL - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x92' # 0x1A -> CONTROL + u'\x8f' # 0x1B -> CONTROL + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u'\x80' # 0x20 -> CONTROL u'\x81' # 0x21 -> CONTROL u'\x82' # 0x22 -> CONTROL @@ -74,12 +74,12 @@ u'\x1b' # 0x27 -> ESCAPE u'\x88' # 0x28 -> CONTROL u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2a -> CONTROL - u'\x8b' # 0x2b -> CONTROL - u'\x8c' # 0x2c -> CONTROL - u'\x05' # 0x2d -> ENQUIRY - u'\x06' # 0x2e -> ACKNOWLEDGE - u'\x07' # 0x2f -> BELL + u'\x8a' # 0x2A -> CONTROL + u'\x8b' # 0x2B -> CONTROL + u'\x8c' # 0x2C -> CONTROL + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL u'\x90' # 0x30 -> CONTROL u'\x91' # 0x31 -> CONTROL u'\x16' # 0x32 -> SYNCHRONOUS IDLE @@ -90,12 +90,12 @@ u'\x04' # 0x37 -> END OF TRANSMISSION u'\x98' # 0x38 -> CONTROL u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3a -> CONTROL - u'\x9b' # 0x3b -> CONTROL - u'\x14' # 0x3c -> DEVICE CONTROL FOUR - u'\x15' # 0x3d -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3e -> CONTROL - u'\x1a' # 0x3f -> SUBSTITUTE + u'\x9a' # 0x3A -> CONTROL + u'\x9b' # 0x3B -> CONTROL + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> CONTROL + u'\x1a' # 0x3F -> SUBSTITUTE u' ' # 0x40 -> SPACE u'\xa0' # 0x41 -> NO-BREAK SPACE u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX @@ -106,12 +106,12 @@ u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'\xa2' # 0x4a -> CENT SIGN - u'.' # 0x4b -> FULL STOP - u'<' # 0x4c -> LESS-THAN SIGN - u'(' # 0x4d -> LEFT PARENTHESIS - u'+' # 0x4e -> PLUS SIGN - u'|' # 0x4f -> VERTICAL LINE + u'\xa2' # 0x4A -> CENT SIGN + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'|' # 0x4F -> VERTICAL LINE u'&' # 0x50 -> AMPERSAND u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX @@ -122,12 +122,12 @@ u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'!' # 0x5a -> EXCLAMATION MARK - u'$' # 0x5b -> DOLLAR SIGN - u'*' # 0x5c -> ASTERISK - u')' # 0x5d -> RIGHT PARENTHESIS - u';' # 0x5e -> SEMICOLON - u'\xac' # 0x5f -> NOT SIGN + u'!' # 0x5A -> EXCLAMATION MARK + u'$' # 0x5B -> DOLLAR SIGN + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'\xac' # 0x5F -> NOT SIGN u'-' # 0x60 -> HYPHEN-MINUS u'/' # 0x61 -> SOLIDUS u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX @@ -138,12 +138,12 @@ u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xa6' # 0x6a -> BROKEN BAR - u',' # 0x6b -> COMMA - u'%' # 0x6c -> PERCENT SIGN - u'_' # 0x6d -> LOW LINE - u'>' # 0x6e -> GREATER-THAN SIGN - u'?' # 0x6f -> QUESTION MARK + u'\xa6' # 0x6A -> BROKEN BAR + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX @@ -154,12 +154,12 @@ u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7a -> COLON - u'#' # 0x7b -> NUMBER SIGN - u'@' # 0x7c -> COMMERCIAL AT - u"'" # 0x7d -> APOSTROPHE - u'=' # 0x7e -> EQUALS SIGN - u'"' # 0x7f -> QUOTATION MARK + u':' # 0x7A -> COLON + u'#' # 0x7B -> NUMBER SIGN + u'@' # 0x7C -> COMMERCIAL AT + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'"' # 0x7F -> QUOTATION MARK u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE u'a' # 0x81 -> LATIN SMALL LETTER A u'b' # 0x82 -> LATIN SMALL LETTER B @@ -170,12 +170,12 @@ u'g' # 0x87 -> LATIN SMALL LETTER G u'h' # 0x88 -> LATIN SMALL LETTER H u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8a -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8b -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xf0' # 0x8c -> LATIN SMALL LETTER ETH (ICELANDIC) - u'\xfd' # 0x8d -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0x8e -> LATIN SMALL LETTER THORN (ICELANDIC) - u'\xb1' # 0x8f -> PLUS-MINUS SIGN + u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) + u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) + u'\xb1' # 0x8F -> PLUS-MINUS SIGN u'\xb0' # 0x90 -> DEGREE SIGN u'j' # 0x91 -> LATIN SMALL LETTER J u'k' # 0x92 -> LATIN SMALL LETTER K @@ -186,108 +186,108 @@ u'p' # 0x97 -> LATIN SMALL LETTER P u'q' # 0x98 -> LATIN SMALL LETTER Q u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9a -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9b -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9c -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9d -> CEDILLA - u'\xc6' # 0x9e -> LATIN CAPITAL LIGATURE AE - u'\u20ac' # 0x9f -> EURO SIGN - u'\xb5' # 0xa0 -> MICRO SIGN - u'~' # 0xa1 -> TILDE - u's' # 0xa2 -> LATIN SMALL LETTER S - u't' # 0xa3 -> LATIN SMALL LETTER T - u'u' # 0xa4 -> LATIN SMALL LETTER U - u'v' # 0xa5 -> LATIN SMALL LETTER V - u'w' # 0xa6 -> LATIN SMALL LETTER W - u'x' # 0xa7 -> LATIN SMALL LETTER X - u'y' # 0xa8 -> LATIN SMALL LETTER Y - u'z' # 0xa9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xaa -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xab -> INVERTED QUESTION MARK - u'\xd0' # 0xac -> LATIN CAPITAL LETTER ETH (ICELANDIC) - u'\xdd' # 0xad -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xae -> LATIN CAPITAL LETTER THORN (ICELANDIC) - u'\xae' # 0xaf -> REGISTERED SIGN - u'^' # 0xb0 -> CIRCUMFLEX ACCENT - u'\xa3' # 0xb1 -> POUND SIGN - u'\xa5' # 0xb2 -> YEN SIGN - u'\xb7' # 0xb3 -> MIDDLE DOT - u'\xa9' # 0xb4 -> COPYRIGHT SIGN - u'\xa7' # 0xb5 -> SECTION SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xbc' # 0xb7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xb8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xb9 -> VULGAR FRACTION THREE QUARTERS - u'[' # 0xba -> LEFT SQUARE BRACKET - u']' # 0xbb -> RIGHT SQUARE BRACKET - u'\xaf' # 0xbc -> MACRON - u'\xa8' # 0xbd -> DIAERESIS - u'\xb4' # 0xbe -> ACUTE ACCENT - u'\xd7' # 0xbf -> MULTIPLICATION SIGN - u'{' # 0xc0 -> LEFT CURLY BRACKET - u'A' # 0xc1 -> LATIN CAPITAL LETTER A - u'B' # 0xc2 -> LATIN CAPITAL LETTER B - u'C' # 0xc3 -> LATIN CAPITAL LETTER C - u'D' # 0xc4 -> LATIN CAPITAL LETTER D - u'E' # 0xc5 -> LATIN CAPITAL LETTER E - u'F' # 0xc6 -> LATIN CAPITAL LETTER F - u'G' # 0xc7 -> LATIN CAPITAL LETTER G - u'H' # 0xc8 -> LATIN CAPITAL LETTER H - u'I' # 0xc9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xca -> SOFT HYPHEN - u'\xf4' # 0xcb -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0xcc -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0xcd -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xce -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xcf -> LATIN SMALL LETTER O WITH TILDE - u'}' # 0xd0 -> RIGHT CURLY BRACKET - u'J' # 0xd1 -> LATIN CAPITAL LETTER J - u'K' # 0xd2 -> LATIN CAPITAL LETTER K - u'L' # 0xd3 -> LATIN CAPITAL LETTER L - u'M' # 0xd4 -> LATIN CAPITAL LETTER M - u'N' # 0xd5 -> LATIN CAPITAL LETTER N - u'O' # 0xd6 -> LATIN CAPITAL LETTER O - u'P' # 0xd7 -> LATIN CAPITAL LETTER P - u'Q' # 0xd8 -> LATIN CAPITAL LETTER Q - u'R' # 0xd9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xda -> SUPERSCRIPT ONE - u'\xfb' # 0xdb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xdc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf9' # 0xdd -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xde -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xdf -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\\' # 0xe0 -> REVERSE SOLIDUS - u'\xf7' # 0xe1 -> DIVISION SIGN - u'S' # 0xe2 -> LATIN CAPITAL LETTER S - u'T' # 0xe3 -> LATIN CAPITAL LETTER T - u'U' # 0xe4 -> LATIN CAPITAL LETTER U - u'V' # 0xe5 -> LATIN CAPITAL LETTER V - u'W' # 0xe6 -> LATIN CAPITAL LETTER W - u'X' # 0xe7 -> LATIN CAPITAL LETTER X - u'Y' # 0xe8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xe9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xea -> SUPERSCRIPT TWO - u'\xd4' # 0xeb -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd6' # 0xec -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd2' # 0xed -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xee -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xef -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xf0 -> DIGIT ZERO - u'1' # 0xf1 -> DIGIT ONE - u'2' # 0xf2 -> DIGIT TWO - u'3' # 0xf3 -> DIGIT THREE - u'4' # 0xf4 -> DIGIT FOUR - u'5' # 0xf5 -> DIGIT FIVE - u'6' # 0xf6 -> DIGIT SIX - u'7' # 0xf7 -> DIGIT SEVEN - u'8' # 0xf8 -> DIGIT EIGHT - u'9' # 0xf9 -> DIGIT NINE - u'\xb3' # 0xfa -> SUPERSCRIPT THREE - u'\xdb' # 0xfb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xfc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xd9' # 0xfd -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xfe -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xff -> CONTROL + u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR + u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE + u'\xb8' # 0x9D -> CEDILLA + u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE + u'\u20ac' # 0x9F -> EURO SIGN + u'\xb5' # 0xA0 -> MICRO SIGN + u'~' # 0xA1 -> TILDE + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK + u'\xbf' # 0xAB -> INVERTED QUESTION MARK + u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) + u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) + u'\xae' # 0xAF -> REGISTERED SIGN + u'^' # 0xB0 -> CIRCUMFLEX ACCENT + u'\xa3' # 0xB1 -> POUND SIGN + u'\xa5' # 0xB2 -> YEN SIGN + u'\xb7' # 0xB3 -> MIDDLE DOT + u'\xa9' # 0xB4 -> COPYRIGHT SIGN + u'\xa7' # 0xB5 -> SECTION SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS + u'[' # 0xBA -> LEFT SQUARE BRACKET + u']' # 0xBB -> RIGHT SQUARE BRACKET + u'\xaf' # 0xBC -> MACRON + u'\xa8' # 0xBD -> DIAERESIS + u'\xb4' # 0xBE -> ACUTE ACCENT + u'\xd7' # 0xBF -> MULTIPLICATION SIGN + u'{' # 0xC0 -> LEFT CURLY BRACKET + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE + u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE + u'}' # 0xD0 -> RIGHT CURLY BRACKET + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xDA -> SUPERSCRIPT ONE + u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE + u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\\' # 0xE0 -> REVERSE SOLIDUS + u'\xf7' # 0xE1 -> DIVISION SIGN + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE + u'\x9f' # 0xFF -> CONTROL ) ### Encoding Map @@ -298,97 +298,97 @@ 0x0002: 0x02, # START OF TEXT 0x0003: 0x03, # END OF TEXT 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2d, # ENQUIRY - 0x0006: 0x2e, # ACKNOWLEDGE - 0x0007: 0x2f, # BELL + 0x0005: 0x2D, # ENQUIRY + 0x0006: 0x2E, # ACKNOWLEDGE + 0x0007: 0x2F, # BELL 0x0008: 0x16, # BACKSPACE 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000a: 0x25, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x25, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3c, # DEVICE CONTROL FOUR - 0x0015: 0x3d, # NEGATIVE ACKNOWLEDGE + 0x0014: 0x3C, # DEVICE CONTROL FOUR + 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE 0x0016: 0x32, # SYNCHRONOUS IDLE 0x0017: 0x26, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x3f, # SUBSTITUTE - 0x001b: 0x27, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x3F, # SUBSTITUTE + 0x001B: 0x27, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x40, # SPACE - 0x0021: 0x5a, # EXCLAMATION MARK - 0x0022: 0x7f, # QUOTATION MARK - 0x0023: 0x7b, # NUMBER SIGN - 0x0024: 0x5b, # DOLLAR SIGN - 0x0025: 0x6c, # PERCENT SIGN + 0x0021: 0x5A, # EXCLAMATION MARK + 0x0022: 0x7F, # QUOTATION MARK + 0x0023: 0x7B, # NUMBER SIGN + 0x0024: 0x5B, # DOLLAR SIGN + 0x0025: 0x6C, # PERCENT SIGN 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7d, # APOSTROPHE - 0x0028: 0x4d, # LEFT PARENTHESIS - 0x0029: 0x5d, # RIGHT PARENTHESIS - 0x002a: 0x5c, # ASTERISK - 0x002b: 0x4e, # PLUS SIGN - 0x002c: 0x6b, # COMMA - 0x002d: 0x60, # HYPHEN-MINUS - 0x002e: 0x4b, # FULL STOP - 0x002f: 0x61, # SOLIDUS - 0x0030: 0xf0, # DIGIT ZERO - 0x0031: 0xf1, # DIGIT ONE - 0x0032: 0xf2, # DIGIT TWO - 0x0033: 0xf3, # DIGIT THREE - 0x0034: 0xf4, # DIGIT FOUR - 0x0035: 0xf5, # DIGIT FIVE - 0x0036: 0xf6, # DIGIT SIX - 0x0037: 0xf7, # DIGIT SEVEN - 0x0038: 0xf8, # DIGIT EIGHT - 0x0039: 0xf9, # DIGIT NINE - 0x003a: 0x7a, # COLON - 0x003b: 0x5e, # SEMICOLON - 0x003c: 0x4c, # LESS-THAN SIGN - 0x003d: 0x7e, # EQUALS SIGN - 0x003e: 0x6e, # GREATER-THAN SIGN - 0x003f: 0x6f, # QUESTION MARK - 0x0040: 0x7c, # COMMERCIAL AT - 0x0041: 0xc1, # LATIN CAPITAL LETTER A - 0x0042: 0xc2, # LATIN CAPITAL LETTER B - 0x0043: 0xc3, # LATIN CAPITAL LETTER C - 0x0044: 0xc4, # LATIN CAPITAL LETTER D - 0x0045: 0xc5, # LATIN CAPITAL LETTER E - 0x0046: 0xc6, # LATIN CAPITAL LETTER F - 0x0047: 0xc7, # LATIN CAPITAL LETTER G - 0x0048: 0xc8, # LATIN CAPITAL LETTER H - 0x0049: 0xc9, # LATIN CAPITAL LETTER I - 0x004a: 0xd1, # LATIN CAPITAL LETTER J - 0x004b: 0xd2, # LATIN CAPITAL LETTER K - 0x004c: 0xd3, # LATIN CAPITAL LETTER L - 0x004d: 0xd4, # LATIN CAPITAL LETTER M - 0x004e: 0xd5, # LATIN CAPITAL LETTER N - 0x004f: 0xd6, # LATIN CAPITAL LETTER O - 0x0050: 0xd7, # LATIN CAPITAL LETTER P - 0x0051: 0xd8, # LATIN CAPITAL LETTER Q - 0x0052: 0xd9, # LATIN CAPITAL LETTER R - 0x0053: 0xe2, # LATIN CAPITAL LETTER S - 0x0054: 0xe3, # LATIN CAPITAL LETTER T - 0x0055: 0xe4, # LATIN CAPITAL LETTER U - 0x0056: 0xe5, # LATIN CAPITAL LETTER V - 0x0057: 0xe6, # LATIN CAPITAL LETTER W - 0x0058: 0xe7, # LATIN CAPITAL LETTER X - 0x0059: 0xe8, # LATIN CAPITAL LETTER Y - 0x005a: 0xe9, # LATIN CAPITAL LETTER Z - 0x005b: 0xba, # LEFT SQUARE BRACKET - 0x005c: 0xe0, # REVERSE SOLIDUS - 0x005d: 0xbb, # RIGHT SQUARE BRACKET - 0x005e: 0xb0, # CIRCUMFLEX ACCENT - 0x005f: 0x6d, # LOW LINE + 0x0027: 0x7D, # APOSTROPHE + 0x0028: 0x4D, # LEFT PARENTHESIS + 0x0029: 0x5D, # RIGHT PARENTHESIS + 0x002A: 0x5C, # ASTERISK + 0x002B: 0x4E, # PLUS SIGN + 0x002C: 0x6B, # COMMA + 0x002D: 0x60, # HYPHEN-MINUS + 0x002E: 0x4B, # FULL STOP + 0x002F: 0x61, # SOLIDUS + 0x0030: 0xF0, # DIGIT ZERO + 0x0031: 0xF1, # DIGIT ONE + 0x0032: 0xF2, # DIGIT TWO + 0x0033: 0xF3, # DIGIT THREE + 0x0034: 0xF4, # DIGIT FOUR + 0x0035: 0xF5, # DIGIT FIVE + 0x0036: 0xF6, # DIGIT SIX + 0x0037: 0xF7, # DIGIT SEVEN + 0x0038: 0xF8, # DIGIT EIGHT + 0x0039: 0xF9, # DIGIT NINE + 0x003A: 0x7A, # COLON + 0x003B: 0x5E, # SEMICOLON + 0x003C: 0x4C, # LESS-THAN SIGN + 0x003D: 0x7E, # EQUALS SIGN + 0x003E: 0x6E, # GREATER-THAN SIGN + 0x003F: 0x6F, # QUESTION MARK + 0x0040: 0x7C, # COMMERCIAL AT + 0x0041: 0xC1, # LATIN CAPITAL LETTER A + 0x0042: 0xC2, # LATIN CAPITAL LETTER B + 0x0043: 0xC3, # LATIN CAPITAL LETTER C + 0x0044: 0xC4, # LATIN CAPITAL LETTER D + 0x0045: 0xC5, # LATIN CAPITAL LETTER E + 0x0046: 0xC6, # LATIN CAPITAL LETTER F + 0x0047: 0xC7, # LATIN CAPITAL LETTER G + 0x0048: 0xC8, # LATIN CAPITAL LETTER H + 0x0049: 0xC9, # LATIN CAPITAL LETTER I + 0x004A: 0xD1, # LATIN CAPITAL LETTER J + 0x004B: 0xD2, # LATIN CAPITAL LETTER K + 0x004C: 0xD3, # LATIN CAPITAL LETTER L + 0x004D: 0xD4, # LATIN CAPITAL LETTER M + 0x004E: 0xD5, # LATIN CAPITAL LETTER N + 0x004F: 0xD6, # LATIN CAPITAL LETTER O + 0x0050: 0xD7, # LATIN CAPITAL LETTER P + 0x0051: 0xD8, # LATIN CAPITAL LETTER Q + 0x0052: 0xD9, # LATIN CAPITAL LETTER R + 0x0053: 0xE2, # LATIN CAPITAL LETTER S + 0x0054: 0xE3, # LATIN CAPITAL LETTER T + 0x0055: 0xE4, # LATIN CAPITAL LETTER U + 0x0056: 0xE5, # LATIN CAPITAL LETTER V + 0x0057: 0xE6, # LATIN CAPITAL LETTER W + 0x0058: 0xE7, # LATIN CAPITAL LETTER X + 0x0059: 0xE8, # LATIN CAPITAL LETTER Y + 0x005A: 0xE9, # LATIN CAPITAL LETTER Z + 0x005B: 0xBA, # LEFT SQUARE BRACKET + 0x005C: 0xE0, # REVERSE SOLIDUS + 0x005D: 0xBB, # RIGHT SQUARE BRACKET + 0x005E: 0xB0, # CIRCUMFLEX ACCENT + 0x005F: 0x6D, # LOW LINE 0x0060: 0x79, # GRAVE ACCENT 0x0061: 0x81, # LATIN SMALL LETTER A 0x0062: 0x82, # LATIN SMALL LETTER B @@ -399,28 +399,28 @@ 0x0067: 0x87, # LATIN SMALL LETTER G 0x0068: 0x88, # LATIN SMALL LETTER H 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006a: 0x91, # LATIN SMALL LETTER J - 0x006b: 0x92, # LATIN SMALL LETTER K - 0x006c: 0x93, # LATIN SMALL LETTER L - 0x006d: 0x94, # LATIN SMALL LETTER M - 0x006e: 0x95, # LATIN SMALL LETTER N - 0x006f: 0x96, # LATIN SMALL LETTER O + 0x006A: 0x91, # LATIN SMALL LETTER J + 0x006B: 0x92, # LATIN SMALL LETTER K + 0x006C: 0x93, # LATIN SMALL LETTER L + 0x006D: 0x94, # LATIN SMALL LETTER M + 0x006E: 0x95, # LATIN SMALL LETTER N + 0x006F: 0x96, # LATIN SMALL LETTER O 0x0070: 0x97, # LATIN SMALL LETTER P 0x0071: 0x98, # LATIN SMALL LETTER Q 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xa2, # LATIN SMALL LETTER S - 0x0074: 0xa3, # LATIN SMALL LETTER T - 0x0075: 0xa4, # LATIN SMALL LETTER U - 0x0076: 0xa5, # LATIN SMALL LETTER V - 0x0077: 0xa6, # LATIN SMALL LETTER W - 0x0078: 0xa7, # LATIN SMALL LETTER X - 0x0079: 0xa8, # LATIN SMALL LETTER Y - 0x007a: 0xa9, # LATIN SMALL LETTER Z - 0x007b: 0xc0, # LEFT CURLY BRACKET - 0x007c: 0x4f, # VERTICAL LINE - 0x007d: 0xd0, # RIGHT CURLY BRACKET - 0x007e: 0xa1, # TILDE - 0x007f: 0x07, # DELETE + 0x0073: 0xA2, # LATIN SMALL LETTER S + 0x0074: 0xA3, # LATIN SMALL LETTER T + 0x0075: 0xA4, # LATIN SMALL LETTER U + 0x0076: 0xA5, # LATIN SMALL LETTER V + 0x0077: 0xA6, # LATIN SMALL LETTER W + 0x0078: 0xA7, # LATIN SMALL LETTER X + 0x0079: 0xA8, # LATIN SMALL LETTER Y + 0x007A: 0xA9, # LATIN SMALL LETTER Z + 0x007B: 0xC0, # LEFT CURLY BRACKET + 0x007C: 0x4F, # VERTICAL LINE + 0x007D: 0xD0, # RIGHT CURLY BRACKET + 0x007E: 0xA1, # TILDE + 0x007F: 0x07, # DELETE 0x0080: 0x20, # CONTROL 0x0081: 0x21, # CONTROL 0x0082: 0x22, # CONTROL @@ -431,15 +431,15 @@ 0x0087: 0x17, # CONTROL 0x0088: 0x28, # CONTROL 0x0089: 0x29, # CONTROL - 0x008a: 0x2a, # CONTROL - 0x008b: 0x2b, # CONTROL - 0x008c: 0x2c, # CONTROL - 0x008d: 0x09, # CONTROL - 0x008e: 0x0a, # CONTROL - 0x008f: 0x1b, # CONTROL + 0x008A: 0x2A, # CONTROL + 0x008B: 0x2B, # CONTROL + 0x008C: 0x2C, # CONTROL + 0x008D: 0x09, # CONTROL + 0x008E: 0x0A, # CONTROL + 0x008F: 0x1B, # CONTROL 0x0090: 0x30, # CONTROL 0x0091: 0x31, # CONTROL - 0x0092: 0x1a, # CONTROL + 0x0092: 0x1A, # CONTROL 0x0093: 0x33, # CONTROL 0x0094: 0x34, # CONTROL 0x0095: 0x35, # CONTROL @@ -447,106 +447,107 @@ 0x0097: 0x08, # CONTROL 0x0098: 0x38, # CONTROL 0x0099: 0x39, # CONTROL - 0x009a: 0x3a, # CONTROL - 0x009b: 0x3b, # CONTROL - 0x009c: 0x04, # CONTROL - 0x009d: 0x14, # CONTROL - 0x009e: 0x3e, # CONTROL - 0x009f: 0xff, # CONTROL - 0x00a0: 0x41, # NO-BREAK SPACE - 0x00a1: 0xaa, # INVERTED EXCLAMATION MARK - 0x00a2: 0x4a, # CENT SIGN - 0x00a3: 0xb1, # POUND SIGN - 0x00a5: 0xb2, # YEN SIGN - 0x00a6: 0x6a, # BROKEN BAR - 0x00a7: 0xb5, # SECTION SIGN - 0x00a8: 0xbd, # DIAERESIS - 0x00a9: 0xb4, # COPYRIGHT SIGN - 0x00aa: 0x9a, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x8a, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x5f, # NOT SIGN - 0x00ad: 0xca, # SOFT HYPHEN - 0x00ae: 0xaf, # REGISTERED SIGN - 0x00af: 0xbc, # MACRON - 0x00b0: 0x90, # DEGREE SIGN - 0x00b1: 0x8f, # PLUS-MINUS SIGN - 0x00b2: 0xea, # SUPERSCRIPT TWO - 0x00b3: 0xfa, # SUPERSCRIPT THREE - 0x00b4: 0xbe, # ACUTE ACCENT - 0x00b5: 0xa0, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb3, # MIDDLE DOT - 0x00b8: 0x9d, # CEDILLA - 0x00b9: 0xda, # SUPERSCRIPT ONE - 0x00ba: 0x9b, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x8b, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xb7, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xb8, # VULGAR FRACTION ONE HALF - 0x00be: 0xb9, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0xab, # INVERTED QUESTION MARK - 0x00c0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x9e, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d0: 0xac, # LATIN CAPITAL LETTER ETH (ICELANDIC) - 0x00d1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xed, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xee, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xeb, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xef, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0xec, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xbf, # MULTIPLICATION SIGN - 0x00d8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0xfd, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xfe, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xfb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0xfc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0xad, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0xae, # LATIN CAPITAL LETTER THORN (ICELANDIC) - 0x00df: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e0: 0x44, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x45, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x46, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x9c, # LATIN SMALL LIGATURE AE - 0x00e7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x54, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x51, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x58, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x55, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f0: 0x8c, # LATIN SMALL LETTER ETH (ICELANDIC) - 0x00f1: 0x49, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0xcd, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0xce, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xcb, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0xcf, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0xcc, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xe1, # DIVISION SIGN - 0x00f8: 0x70, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0xdd, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0xde, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0xdb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xdc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0x8d, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0x8e, # LATIN SMALL LETTER THORN (ICELANDIC) - 0x00ff: 0xdf, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x20ac: 0x9f, # EURO SIGN -} \ No newline at end of file + 0x009A: 0x3A, # CONTROL + 0x009B: 0x3B, # CONTROL + 0x009C: 0x04, # CONTROL + 0x009D: 0x14, # CONTROL + 0x009E: 0x3E, # CONTROL + 0x009F: 0xFF, # CONTROL + 0x00A0: 0x41, # NO-BREAK SPACE + 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK + 0x00A2: 0x4A, # CENT SIGN + 0x00A3: 0xB1, # POUND SIGN + 0x00A5: 0xB2, # YEN SIGN + 0x00A6: 0x6A, # BROKEN BAR + 0x00A7: 0xB5, # SECTION SIGN + 0x00A8: 0xBD, # DIAERESIS + 0x00A9: 0xB4, # COPYRIGHT SIGN + 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR + 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0x5F, # NOT SIGN + 0x00AD: 0xCA, # SOFT HYPHEN + 0x00AE: 0xAF, # REGISTERED SIGN + 0x00AF: 0xBC, # MACRON + 0x00B0: 0x90, # DEGREE SIGN + 0x00B1: 0x8F, # PLUS-MINUS SIGN + 0x00B2: 0xEA, # SUPERSCRIPT TWO + 0x00B3: 0xFA, # SUPERSCRIPT THREE + 0x00B4: 0xBE, # ACUTE ACCENT + 0x00B5: 0xA0, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB3, # MIDDLE DOT + 0x00B8: 0x9D, # CEDILLA + 0x00B9: 0xDA, # SUPERSCRIPT ONE + 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF + 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xAB, # INVERTED QUESTION MARK + 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE + 0x00C7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xAC, # LATIN CAPITAL LETTER ETH (ICELANDIC) + 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xEC, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xBF, # MULTIPLICATION SIGN + 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xFC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xAD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xAE, # LATIN CAPITAL LETTER THORN (ICELANDIC) + 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE + 0x00E7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0x8C, # LATIN SMALL LETTER ETH (ICELANDIC) + 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xCC, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xE1, # DIVISION SIGN + 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xDC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0x8D, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0x8E, # LATIN SMALL LETTER THORN (ICELANDIC) + 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x20AC: 0x9F, # EURO SIGN +} + Index: cp1250.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1250.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp1250.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp1250.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\u20ac' # 0x80 -> EURO SIGN u'\ufffe' # 0x81 -> UNDEFINED u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK @@ -170,12 +170,12 @@ u'\u2021' # 0x87 -> DOUBLE DAGGER u'\ufffe' # 0x88 -> UNDEFINED u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0160' # 0x8a -> LATIN CAPITAL LETTER S WITH CARON - u'\u2039' # 0x8b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u015a' # 0x8c -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u0164' # 0x8d -> LATIN CAPITAL LETTER T WITH CARON - u'\u017d' # 0x8e -> LATIN CAPITAL LETTER Z WITH CARON - u'\u0179' # 0x8f -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u015a' # 0x8C -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u0164' # 0x8D -> LATIN CAPITAL LETTER T WITH CARON + u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON + u'\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE u'\ufffe' # 0x90 -> UNDEFINED u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK @@ -186,108 +186,108 @@ u'\u2014' # 0x97 -> EM DASH u'\ufffe' # 0x98 -> UNDEFINED u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0161' # 0x9a -> LATIN SMALL LETTER S WITH CARON - u'\u203a' # 0x9b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u015b' # 0x9c -> LATIN SMALL LETTER S WITH ACUTE - u'\u0165' # 0x9d -> LATIN SMALL LETTER T WITH CARON - u'\u017e' # 0x9e -> LATIN SMALL LETTER Z WITH CARON - u'\u017a' # 0x9f -> LATIN SMALL LETTER Z WITH ACUTE - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u02c7' # 0xa1 -> CARON - u'\u02d8' # 0xa2 -> BREVE - u'\u0141' # 0xa3 -> LATIN CAPITAL LETTER L WITH STROKE - u'\xa4' # 0xa4 -> CURRENCY SIGN - u'\u0104' # 0xa5 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\xa6' # 0xa6 -> BROKEN BAR - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xa8' # 0xa8 -> DIAERESIS - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\u015e' # 0xaa -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xac -> NOT SIGN - u'\xad' # 0xad -> SOFT HYPHEN - u'\xae' # 0xae -> REGISTERED SIGN - u'\u017b' # 0xaf -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\u02db' # 0xb2 -> OGONEK - u'\u0142' # 0xb3 -> LATIN SMALL LETTER L WITH STROKE - u'\xb4' # 0xb4 -> ACUTE ACCENT - u'\xb5' # 0xb5 -> MICRO SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\xb8' # 0xb8 -> CEDILLA - u'\u0105' # 0xb9 -> LATIN SMALL LETTER A WITH OGONEK - u'\u015f' # 0xba -> LATIN SMALL LETTER S WITH CEDILLA - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u013d' # 0xbc -> LATIN CAPITAL LETTER L WITH CARON - u'\u02dd' # 0xbd -> DOUBLE ACUTE ACCENT - u'\u013e' # 0xbe -> LATIN SMALL LETTER L WITH CARON - u'\u017c' # 0xbf -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u0154' # 0xc0 -> LATIN CAPITAL LETTER R WITH ACUTE - u'\xc1' # 0xc1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xc2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xc3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xc4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0139' # 0xc5 -> LATIN CAPITAL LETTER L WITH ACUTE - u'\u0106' # 0xc6 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc7' # 0xc7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\u010c' # 0xc8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xc9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xca -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xcb -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u011a' # 0xcc -> LATIN CAPITAL LETTER E WITH CARON - u'\xcd' # 0xcd -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u010e' # 0xcf -> LATIN CAPITAL LETTER D WITH CARON - u'\u0110' # 0xd0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0143' # 0xd1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0147' # 0xd2 -> LATIN CAPITAL LETTER N WITH CARON - u'\xd3' # 0xd3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xd4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0150' # 0xd5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd6' # 0xd6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xd7 -> MULTIPLICATION SIGN - u'\u0158' # 0xd8 -> LATIN CAPITAL LETTER R WITH CARON - u'\u016e' # 0xd9 -> LATIN CAPITAL LETTER U WITH RING ABOVE - u'\xda' # 0xda -> LATIN CAPITAL LETTER U WITH ACUTE - u'\u0170' # 0xdb -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\xdc' # 0xdc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xdd -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\u0162' # 0xde -> LATIN CAPITAL LETTER T WITH CEDILLA - u'\xdf' # 0xdf -> LATIN SMALL LETTER SHARP S - u'\u0155' # 0xe0 -> LATIN SMALL LETTER R WITH ACUTE - u'\xe1' # 0xe1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xe2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xe3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xe4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u013a' # 0xe5 -> LATIN SMALL LETTER L WITH ACUTE - u'\u0107' # 0xe6 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe7' # 0xe7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\u010d' # 0xe8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xea -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xeb -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u011b' # 0xec -> LATIN SMALL LETTER E WITH CARON - u'\xed' # 0xed -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xee -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u010f' # 0xef -> LATIN SMALL LETTER D WITH CARON - u'\u0111' # 0xf0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0144' # 0xf1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0148' # 0xf2 -> LATIN SMALL LETTER N WITH CARON - u'\xf3' # 0xf3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xf4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0151' # 0xf5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\xf6' # 0xf6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xf7 -> DIVISION SIGN - u'\u0159' # 0xf8 -> LATIN SMALL LETTER R WITH CARON - u'\u016f' # 0xf9 -> LATIN SMALL LETTER U WITH RING ABOVE - u'\xfa' # 0xfa -> LATIN SMALL LETTER U WITH ACUTE - u'\u0171' # 0xfb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xfd -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0163' # 0xfe -> LATIN SMALL LETTER T WITH CEDILLA - u'\u02d9' # 0xff -> DOT ABOVE + u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u015b' # 0x9C -> LATIN SMALL LETTER S WITH ACUTE + u'\u0165' # 0x9D -> LATIN SMALL LETTER T WITH CARON + u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON + u'\u017a' # 0x9F -> LATIN SMALL LETTER Z WITH ACUTE + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u02c7' # 0xA1 -> CARON + u'\u02d8' # 0xA2 -> BREVE + u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\u0104' # 0xA5 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u02db' # 0xB2 -> OGONEK + u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\u0105' # 0xB9 -> LATIN SMALL LETTER A WITH OGONEK + u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u013d' # 0xBC -> LATIN CAPITAL LETTER L WITH CARON + u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT + u'\u013e' # 0xBE -> LATIN SMALL LETTER L WITH CARON + u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON + u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE + u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON + u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA + u'\u02d9' # 0xFF -> DOT ABOVE ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,133 +415,134 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00a6: 0xa6, # BROKEN BAR - 0x00a7: 0xa7, # SECTION SIGN - 0x00a8: 0xa8, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xac, # NOT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00ae: 0xae, # REGISTERED SIGN - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b4: 0xb4, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00b8: 0xb8, # CEDILLA - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c1: 0xc1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xc2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c4: 0xc4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c7: 0xc7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0xc9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00cb: 0xcb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cd: 0xcd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d3: 0xd3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xd4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d6: 0xd6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xd7, # MULTIPLICATION SIGN - 0x00da: 0xda, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00dc: 0xdc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0xdd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00df: 0xdf, # LATIN SMALL LETTER SHARP S - 0x00e1: 0xe1, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0xe2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0xe4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e7: 0xe7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00eb: 0xeb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ed: 0xed, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0xee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xf7, # DIVISION SIGN - 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0xfd, # LATIN SMALL LETTER Y WITH ACUTE - 0x0102: 0xc3, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0xe3, # LATIN SMALL LETTER A WITH BREVE - 0x0104: 0xa5, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xb9, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0xc6, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xe6, # LATIN SMALL LETTER C WITH ACUTE - 0x010c: 0xc8, # LATIN CAPITAL LETTER C WITH CARON - 0x010d: 0xe8, # LATIN SMALL LETTER C WITH CARON - 0x010e: 0xcf, # LATIN CAPITAL LETTER D WITH CARON - 0x010f: 0xef, # LATIN SMALL LETTER D WITH CARON - 0x0110: 0xd0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xf0, # LATIN SMALL LETTER D WITH STROKE - 0x0118: 0xca, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xea, # LATIN SMALL LETTER E WITH OGONEK - 0x011a: 0xcc, # LATIN CAPITAL LETTER E WITH CARON - 0x011b: 0xec, # LATIN SMALL LETTER E WITH CARON - 0x0139: 0xc5, # LATIN CAPITAL LETTER L WITH ACUTE - 0x013a: 0xe5, # LATIN SMALL LETTER L WITH ACUTE - 0x013d: 0xbc, # LATIN CAPITAL LETTER L WITH CARON - 0x013e: 0xbe, # LATIN SMALL LETTER L WITH CARON - 0x0141: 0xa3, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xb3, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xd1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xf1, # LATIN SMALL LETTER N WITH ACUTE - 0x0147: 0xd2, # LATIN CAPITAL LETTER N WITH CARON - 0x0148: 0xf2, # LATIN SMALL LETTER N WITH CARON - 0x0150: 0xd5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x0151: 0xf5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x0154: 0xc0, # LATIN CAPITAL LETTER R WITH ACUTE - 0x0155: 0xe0, # LATIN SMALL LETTER R WITH ACUTE - 0x0158: 0xd8, # LATIN CAPITAL LETTER R WITH CARON - 0x0159: 0xf8, # LATIN SMALL LETTER R WITH CARON - 0x015a: 0x8c, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015b: 0x9c, # LATIN SMALL LETTER S WITH ACUTE - 0x015e: 0xaa, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015f: 0xba, # LATIN SMALL LETTER S WITH CEDILLA - 0x0160: 0x8a, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0x9a, # LATIN SMALL LETTER S WITH CARON - 0x0162: 0xde, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x0163: 0xfe, # LATIN SMALL LETTER T WITH CEDILLA - 0x0164: 0x8d, # LATIN CAPITAL LETTER T WITH CARON - 0x0165: 0x9d, # LATIN SMALL LETTER T WITH CARON - 0x016e: 0xd9, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x016f: 0xf9, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0170: 0xdb, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x0171: 0xfb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x0179: 0x8f, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017a: 0x9f, # LATIN SMALL LETTER Z WITH ACUTE - 0x017b: 0xaf, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017c: 0xbf, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017d: 0x8e, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0x9e, # LATIN SMALL LETTER Z WITH CARON - 0x02c7: 0xa1, # CARON - 0x02d8: 0xa2, # BREVE - 0x02d9: 0xff, # DOT ABOVE - 0x02db: 0xb2, # OGONEK - 0x02dd: 0xbd, # DOUBLE ACUTE ACCENT + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE + 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0xA5, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0xB9, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0xC6, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0xE6, # LATIN SMALL LETTER C WITH ACUTE + 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON + 0x010E: 0xCF, # LATIN CAPITAL LETTER D WITH CARON + 0x010F: 0xEF, # LATIN SMALL LETTER D WITH CARON + 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK + 0x011A: 0xCC, # LATIN CAPITAL LETTER E WITH CARON + 0x011B: 0xEC, # LATIN SMALL LETTER E WITH CARON + 0x0139: 0xC5, # LATIN CAPITAL LETTER L WITH ACUTE + 0x013A: 0xE5, # LATIN SMALL LETTER L WITH ACUTE + 0x013D: 0xBC, # LATIN CAPITAL LETTER L WITH CARON + 0x013E: 0xBE, # LATIN SMALL LETTER L WITH CARON + 0x0141: 0xA3, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0xB3, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE + 0x0147: 0xD2, # LATIN CAPITAL LETTER N WITH CARON + 0x0148: 0xF2, # LATIN SMALL LETTER N WITH CARON + 0x0150: 0xD5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0xF5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0154: 0xC0, # LATIN CAPITAL LETTER R WITH ACUTE + 0x0155: 0xE0, # LATIN SMALL LETTER R WITH ACUTE + 0x0158: 0xD8, # LATIN CAPITAL LETTER R WITH CARON + 0x0159: 0xF8, # LATIN SMALL LETTER R WITH CARON + 0x015A: 0x8C, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015B: 0x9C, # LATIN SMALL LETTER S WITH ACUTE + 0x015E: 0xAA, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015F: 0xBA, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0x8A, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x9A, # LATIN SMALL LETTER S WITH CARON + 0x0162: 0xDE, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x0163: 0xFE, # LATIN SMALL LETTER T WITH CEDILLA + 0x0164: 0x8D, # LATIN CAPITAL LETTER T WITH CARON + 0x0165: 0x9D, # LATIN SMALL LETTER T WITH CARON + 0x016E: 0xD9, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x016F: 0xF9, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0170: 0xDB, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0xFB, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0179: 0x8F, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017A: 0x9F, # LATIN SMALL LETTER Z WITH ACUTE + 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017D: 0x8E, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0x9E, # LATIN SMALL LETTER Z WITH CARON + 0x02C7: 0xA1, # CARON + 0x02D8: 0xA2, # BREVE + 0x02D9: 0xFF, # DOT ABOVE + 0x02DB: 0xB2, # OGONEK + 0x02DD: 0xBD, # DOUBLE ACUTE ACCENT 0x2013: 0x96, # EN DASH 0x2014: 0x97, # EM DASH 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK 0x2020: 0x86, # DAGGER 0x2021: 0x87, # DOUBLE DAGGER 0x2022: 0x95, # BULLET 0x2026: 0x85, # HORIZONTAL ELLIPSIS 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20ac: 0x80, # EURO SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AC: 0x80, # EURO SIGN 0x2122: 0x99, # TRADE MARK SIGN -} \ No newline at end of file +} + Index: cp1251.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1251.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp1251.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp1251.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\u0402' # 0x80 -> CYRILLIC CAPITAL LETTER DJE u'\u0403' # 0x81 -> CYRILLIC CAPITAL LETTER GJE u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK @@ -170,12 +170,12 @@ u'\u2021' # 0x87 -> DOUBLE DAGGER u'\u20ac' # 0x88 -> EURO SIGN u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0409' # 0x8a -> CYRILLIC CAPITAL LETTER LJE - u'\u2039' # 0x8b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u040a' # 0x8c -> CYRILLIC CAPITAL LETTER NJE - u'\u040c' # 0x8d -> CYRILLIC CAPITAL LETTER KJE - u'\u040b' # 0x8e -> CYRILLIC CAPITAL LETTER TSHE - u'\u040f' # 0x8f -> CYRILLIC CAPITAL LETTER DZHE + u'\u0409' # 0x8A -> CYRILLIC CAPITAL LETTER LJE + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u040a' # 0x8C -> CYRILLIC CAPITAL LETTER NJE + u'\u040c' # 0x8D -> CYRILLIC CAPITAL LETTER KJE + u'\u040b' # 0x8E -> CYRILLIC CAPITAL LETTER TSHE + u'\u040f' # 0x8F -> CYRILLIC CAPITAL LETTER DZHE u'\u0452' # 0x90 -> CYRILLIC SMALL LETTER DJE u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK @@ -186,108 +186,108 @@ u'\u2014' # 0x97 -> EM DASH u'\ufffe' # 0x98 -> UNDEFINED u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0459' # 0x9a -> CYRILLIC SMALL LETTER LJE - u'\u203a' # 0x9b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u045a' # 0x9c -> CYRILLIC SMALL LETTER NJE - u'\u045c' # 0x9d -> CYRILLIC SMALL LETTER KJE - u'\u045b' # 0x9e -> CYRILLIC SMALL LETTER TSHE - u'\u045f' # 0x9f -> CYRILLIC SMALL LETTER DZHE - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u040e' # 0xa1 -> CYRILLIC CAPITAL LETTER SHORT U - u'\u045e' # 0xa2 -> CYRILLIC SMALL LETTER SHORT U - u'\u0408' # 0xa3 -> CYRILLIC CAPITAL LETTER JE - u'\xa4' # 0xa4 -> CURRENCY SIGN - u'\u0490' # 0xa5 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN - u'\xa6' # 0xa6 -> BROKEN BAR - u'\xa7' # 0xa7 -> SECTION SIGN - u'\u0401' # 0xa8 -> CYRILLIC CAPITAL LETTER IO - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\u0404' # 0xaa -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xac -> NOT SIGN - u'\xad' # 0xad -> SOFT HYPHEN - u'\xae' # 0xae -> REGISTERED SIGN - u'\u0407' # 0xaf -> CYRILLIC CAPITAL LETTER YI - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\u0406' # 0xb2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0456' # 0xb3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0491' # 0xb4 -> CYRILLIC SMALL LETTER GHE WITH UPTURN - u'\xb5' # 0xb5 -> MICRO SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\u0451' # 0xb8 -> CYRILLIC SMALL LETTER IO - u'\u2116' # 0xb9 -> NUMERO SIGN - u'\u0454' # 0xba -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0458' # 0xbc -> CYRILLIC SMALL LETTER JE - u'\u0405' # 0xbd -> CYRILLIC CAPITAL LETTER DZE - u'\u0455' # 0xbe -> CYRILLIC SMALL LETTER DZE - u'\u0457' # 0xbf -> CYRILLIC SMALL LETTER YI - u'\u0410' # 0xc0 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xc1 -> CYRILLIC CAPITAL LETTER BE - u'\u0412' # 0xc2 -> CYRILLIC CAPITAL LETTER VE - u'\u0413' # 0xc3 -> CYRILLIC CAPITAL LETTER GHE - u'\u0414' # 0xc4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xc5 -> CYRILLIC CAPITAL LETTER IE - u'\u0416' # 0xc6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0417' # 0xc7 -> CYRILLIC CAPITAL LETTER ZE - u'\u0418' # 0xc8 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xc9 -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xca -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xcb -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xcc -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xcd -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xce -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xcf -> CYRILLIC CAPITAL LETTER PE - u'\u0420' # 0xd0 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xd1 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xd2 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xd3 -> CYRILLIC CAPITAL LETTER U - u'\u0424' # 0xd4 -> CYRILLIC CAPITAL LETTER EF - u'\u0425' # 0xd5 -> CYRILLIC CAPITAL LETTER HA - u'\u0426' # 0xd6 -> CYRILLIC CAPITAL LETTER TSE - u'\u0427' # 0xd7 -> CYRILLIC CAPITAL LETTER CHE - u'\u0428' # 0xd8 -> CYRILLIC CAPITAL LETTER SHA - u'\u0429' # 0xd9 -> CYRILLIC CAPITAL LETTER SHCHA - u'\u042a' # 0xda -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u042b' # 0xdb -> CYRILLIC CAPITAL LETTER YERU - u'\u042c' # 0xdc -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042d' # 0xdd -> CYRILLIC CAPITAL LETTER E - u'\u042e' # 0xde -> CYRILLIC CAPITAL LETTER YU - u'\u042f' # 0xdf -> CYRILLIC CAPITAL LETTER YA - u'\u0430' # 0xe0 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xe1 -> CYRILLIC SMALL LETTER BE - u'\u0432' # 0xe2 -> CYRILLIC SMALL LETTER VE - u'\u0433' # 0xe3 -> CYRILLIC SMALL LETTER GHE - u'\u0434' # 0xe4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xe5 -> CYRILLIC SMALL LETTER IE - u'\u0436' # 0xe6 -> CYRILLIC SMALL LETTER ZHE - u'\u0437' # 0xe7 -> CYRILLIC SMALL LETTER ZE - u'\u0438' # 0xe8 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xe9 -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xea -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xeb -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xec -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xed -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xee -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xef -> CYRILLIC SMALL LETTER PE - u'\u0440' # 0xf0 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xf1 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xf2 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xf3 -> CYRILLIC SMALL LETTER U - u'\u0444' # 0xf4 -> CYRILLIC SMALL LETTER EF - u'\u0445' # 0xf5 -> CYRILLIC SMALL LETTER HA - u'\u0446' # 0xf6 -> CYRILLIC SMALL LETTER TSE - u'\u0447' # 0xf7 -> CYRILLIC SMALL LETTER CHE - u'\u0448' # 0xf8 -> CYRILLIC SMALL LETTER SHA - u'\u0449' # 0xf9 -> CYRILLIC SMALL LETTER SHCHA - u'\u044a' # 0xfa -> CYRILLIC SMALL LETTER HARD SIGN - u'\u044b' # 0xfb -> CYRILLIC SMALL LETTER YERU - u'\u044c' # 0xfc -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044d' # 0xfd -> CYRILLIC SMALL LETTER E - u'\u044e' # 0xfe -> CYRILLIC SMALL LETTER YU - u'\u044f' # 0xff -> CYRILLIC SMALL LETTER YA + u'\u0459' # 0x9A -> CYRILLIC SMALL LETTER LJE + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u045a' # 0x9C -> CYRILLIC SMALL LETTER NJE + u'\u045c' # 0x9D -> CYRILLIC SMALL LETTER KJE + u'\u045b' # 0x9E -> CYRILLIC SMALL LETTER TSHE + u'\u045f' # 0x9F -> CYRILLIC SMALL LETTER DZHE + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u040e' # 0xA1 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045e' # 0xA2 -> CYRILLIC SMALL LETTER SHORT U + u'\u0408' # 0xA3 -> CYRILLIC CAPITAL LETTER JE + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\u0490' # 0xA5 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\u0401' # 0xA8 -> CYRILLIC CAPITAL LETTER IO + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u0404' # 0xAA -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\u0407' # 0xAF -> CYRILLIC CAPITAL LETTER YI + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u0406' # 0xB2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0456' # 0xB3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0491' # 0xB4 -> CYRILLIC SMALL LETTER GHE WITH UPTURN + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u0451' # 0xB8 -> CYRILLIC SMALL LETTER IO + u'\u2116' # 0xB9 -> NUMERO SIGN + u'\u0454' # 0xBA -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0458' # 0xBC -> CYRILLIC SMALL LETTER JE + u'\u0405' # 0xBD -> CYRILLIC CAPITAL LETTER DZE + u'\u0455' # 0xBE -> CYRILLIC SMALL LETTER DZE + u'\u0457' # 0xBF -> CYRILLIC SMALL LETTER YI + u'\u0410' # 0xC0 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0xC1 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0xC2 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0xC3 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0xC4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0xC5 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0xC6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0xC7 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0xC8 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0xC9 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0xCA -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0xCB -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0xCC -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0xCD -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0xCE -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0xCF -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0xD0 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0xD1 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0xD2 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0xD3 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0xD4 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0xD5 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0xD6 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0xD7 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0xD8 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0xD9 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0xDA -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0xDB -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0xDC -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0xDD -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0xDE -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0xDF -> CYRILLIC CAPITAL LETTER YA + u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE + u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E + u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU + u'\u044f' # 0xFF -> CYRILLIC SMALL LETTER YA ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,137 +415,138 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00a6: 0xa6, # BROKEN BAR - 0x00a7: 0xa7, # SECTION SIGN - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xac, # NOT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00ae: 0xae, # REGISTERED SIGN - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x0401: 0xa8, # CYRILLIC CAPITAL LETTER IO + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x0401: 0xA8, # CYRILLIC CAPITAL LETTER IO 0x0402: 0x80, # CYRILLIC CAPITAL LETTER DJE 0x0403: 0x81, # CYRILLIC CAPITAL LETTER GJE - 0x0404: 0xaa, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0405: 0xbd, # CYRILLIC CAPITAL LETTER DZE - 0x0406: 0xb2, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0407: 0xaf, # CYRILLIC CAPITAL LETTER YI - 0x0408: 0xa3, # CYRILLIC CAPITAL LETTER JE - 0x0409: 0x8a, # CYRILLIC CAPITAL LETTER LJE - 0x040a: 0x8c, # CYRILLIC CAPITAL LETTER NJE - 0x040b: 0x8e, # CYRILLIC CAPITAL LETTER TSHE - 0x040c: 0x8d, # CYRILLIC CAPITAL LETTER KJE - 0x040e: 0xa1, # CYRILLIC CAPITAL LETTER SHORT U - 0x040f: 0x8f, # CYRILLIC CAPITAL LETTER DZHE - 0x0410: 0xc0, # CYRILLIC CAPITAL LETTER A - 0x0411: 0xc1, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0xc2, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0xc3, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0xc4, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0xc5, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0xc6, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0xc7, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0xc8, # CYRILLIC CAPITAL LETTER I - 0x0419: 0xc9, # CYRILLIC CAPITAL LETTER SHORT I - 0x041a: 0xca, # CYRILLIC CAPITAL LETTER KA - 0x041b: 0xcb, # CYRILLIC CAPITAL LETTER EL - 0x041c: 0xcc, # CYRILLIC CAPITAL LETTER EM - 0x041d: 0xcd, # CYRILLIC CAPITAL LETTER EN - 0x041e: 0xce, # CYRILLIC CAPITAL LETTER O - 0x041f: 0xcf, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0xd0, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0xd1, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0xd2, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0xd3, # CYRILLIC CAPITAL LETTER U - 0x0424: 0xd4, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0xd5, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0xd6, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0xd7, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0xd8, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0xd9, # CYRILLIC CAPITAL LETTER SHCHA - 0x042a: 0xda, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042b: 0xdb, # CYRILLIC CAPITAL LETTER YERU - 0x042c: 0xdc, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042d: 0xdd, # CYRILLIC CAPITAL LETTER E - 0x042e: 0xde, # CYRILLIC CAPITAL LETTER YU - 0x042f: 0xdf, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0xe0, # CYRILLIC SMALL LETTER A - 0x0431: 0xe1, # CYRILLIC SMALL LETTER BE - 0x0432: 0xe2, # CYRILLIC SMALL LETTER VE - 0x0433: 0xe3, # CYRILLIC SMALL LETTER GHE - 0x0434: 0xe4, # CYRILLIC SMALL LETTER DE - 0x0435: 0xe5, # CYRILLIC SMALL LETTER IE - 0x0436: 0xe6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0xe7, # CYRILLIC SMALL LETTER ZE - 0x0438: 0xe8, # CYRILLIC SMALL LETTER I - 0x0439: 0xe9, # CYRILLIC SMALL LETTER SHORT I - 0x043a: 0xea, # CYRILLIC SMALL LETTER KA - 0x043b: 0xeb, # CYRILLIC SMALL LETTER EL - 0x043c: 0xec, # CYRILLIC SMALL LETTER EM - 0x043d: 0xed, # CYRILLIC SMALL LETTER EN - 0x043e: 0xee, # CYRILLIC SMALL LETTER O - 0x043f: 0xef, # CYRILLIC SMALL LETTER PE - 0x0440: 0xf0, # CYRILLIC SMALL LETTER ER - 0x0441: 0xf1, # CYRILLIC SMALL LETTER ES - 0x0442: 0xf2, # CYRILLIC SMALL LETTER TE - 0x0443: 0xf3, # CYRILLIC SMALL LETTER U - 0x0444: 0xf4, # CYRILLIC SMALL LETTER EF - 0x0445: 0xf5, # CYRILLIC SMALL LETTER HA - 0x0446: 0xf6, # CYRILLIC SMALL LETTER TSE - 0x0447: 0xf7, # CYRILLIC SMALL LETTER CHE - 0x0448: 0xf8, # CYRILLIC SMALL LETTER SHA - 0x0449: 0xf9, # CYRILLIC SMALL LETTER SHCHA - 0x044a: 0xfa, # CYRILLIC SMALL LETTER HARD SIGN - 0x044b: 0xfb, # CYRILLIC SMALL LETTER YERU - 0x044c: 0xfc, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044d: 0xfd, # CYRILLIC SMALL LETTER E - 0x044e: 0xfe, # CYRILLIC SMALL LETTER YU - 0x044f: 0xff, # CYRILLIC SMALL LETTER YA - 0x0451: 0xb8, # CYRILLIC SMALL LETTER IO + 0x0404: 0xAA, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0xBD, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0xB2, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0xAF, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0xA3, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0x8A, # CYRILLIC CAPITAL LETTER LJE + 0x040A: 0x8C, # CYRILLIC CAPITAL LETTER NJE + 0x040B: 0x8E, # CYRILLIC CAPITAL LETTER TSHE + 0x040C: 0x8D, # CYRILLIC CAPITAL LETTER KJE + 0x040E: 0xA1, # CYRILLIC CAPITAL LETTER SHORT U + 0x040F: 0x8F, # CYRILLIC CAPITAL LETTER DZHE + 0x0410: 0xC0, # CYRILLIC CAPITAL LETTER A + 0x0411: 0xC1, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0xC2, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0xC3, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0xC4, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0xC5, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0xC6, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0xC7, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0xC8, # CYRILLIC CAPITAL LETTER I + 0x0419: 0xC9, # CYRILLIC CAPITAL LETTER SHORT I + 0x041A: 0xCA, # CYRILLIC CAPITAL LETTER KA + 0x041B: 0xCB, # CYRILLIC CAPITAL LETTER EL + 0x041C: 0xCC, # CYRILLIC CAPITAL LETTER EM + 0x041D: 0xCD, # CYRILLIC CAPITAL LETTER EN + 0x041E: 0xCE, # CYRILLIC CAPITAL LETTER O + 0x041F: 0xCF, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0xD0, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0xD1, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0xD2, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0xD3, # CYRILLIC CAPITAL LETTER U + 0x0424: 0xD4, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0xD5, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0xD6, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0xD7, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0xD8, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0xD9, # CYRILLIC CAPITAL LETTER SHCHA + 0x042A: 0xDA, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042B: 0xDB, # CYRILLIC CAPITAL LETTER YERU + 0x042C: 0xDC, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042D: 0xDD, # CYRILLIC CAPITAL LETTER E + 0x042E: 0xDE, # CYRILLIC CAPITAL LETTER YU + 0x042F: 0xDF, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0xE0, # CYRILLIC SMALL LETTER A + 0x0431: 0xE1, # CYRILLIC SMALL LETTER BE + 0x0432: 0xE2, # CYRILLIC SMALL LETTER VE + 0x0433: 0xE3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0xE4, # CYRILLIC SMALL LETTER DE + 0x0435: 0xE5, # CYRILLIC SMALL LETTER IE + 0x0436: 0xE6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0xE7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0xE8, # CYRILLIC SMALL LETTER I + 0x0439: 0xE9, # CYRILLIC SMALL LETTER SHORT I + 0x043A: 0xEA, # CYRILLIC SMALL LETTER KA + 0x043B: 0xEB, # CYRILLIC SMALL LETTER EL + 0x043C: 0xEC, # CYRILLIC SMALL LETTER EM + 0x043D: 0xED, # CYRILLIC SMALL LETTER EN + 0x043E: 0xEE, # CYRILLIC SMALL LETTER O + 0x043F: 0xEF, # CYRILLIC SMALL LETTER PE + 0x0440: 0xF0, # CYRILLIC SMALL LETTER ER + 0x0441: 0xF1, # CYRILLIC SMALL LETTER ES + 0x0442: 0xF2, # CYRILLIC SMALL LETTER TE + 0x0443: 0xF3, # CYRILLIC SMALL LETTER U + 0x0444: 0xF4, # CYRILLIC SMALL LETTER EF + 0x0445: 0xF5, # CYRILLIC SMALL LETTER HA + 0x0446: 0xF6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0xF7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0xF8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0xF9, # CYRILLIC SMALL LETTER SHCHA + 0x044A: 0xFA, # CYRILLIC SMALL LETTER HARD SIGN + 0x044B: 0xFB, # CYRILLIC SMALL LETTER YERU + 0x044C: 0xFC, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044D: 0xFD, # CYRILLIC SMALL LETTER E + 0x044E: 0xFE, # CYRILLIC SMALL LETTER YU + 0x044F: 0xFF, # CYRILLIC SMALL LETTER YA + 0x0451: 0xB8, # CYRILLIC SMALL LETTER IO 0x0452: 0x90, # CYRILLIC SMALL LETTER DJE 0x0453: 0x83, # CYRILLIC SMALL LETTER GJE - 0x0454: 0xba, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0455: 0xbe, # CYRILLIC SMALL LETTER DZE - 0x0456: 0xb3, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0457: 0xbf, # CYRILLIC SMALL LETTER YI - 0x0458: 0xbc, # CYRILLIC SMALL LETTER JE - 0x0459: 0x9a, # CYRILLIC SMALL LETTER LJE - 0x045a: 0x9c, # CYRILLIC SMALL LETTER NJE - 0x045b: 0x9e, # CYRILLIC SMALL LETTER TSHE - 0x045c: 0x9d, # CYRILLIC SMALL LETTER KJE - 0x045e: 0xa2, # CYRILLIC SMALL LETTER SHORT U - 0x045f: 0x9f, # CYRILLIC SMALL LETTER DZHE - 0x0490: 0xa5, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN - 0x0491: 0xb4, # CYRILLIC SMALL LETTER GHE WITH UPTURN + 0x0454: 0xBA, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0xBE, # CYRILLIC SMALL LETTER DZE + 0x0456: 0xB3, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0xBF, # CYRILLIC SMALL LETTER YI + 0x0458: 0xBC, # CYRILLIC SMALL LETTER JE + 0x0459: 0x9A, # CYRILLIC SMALL LETTER LJE + 0x045A: 0x9C, # CYRILLIC SMALL LETTER NJE + 0x045B: 0x9E, # CYRILLIC SMALL LETTER TSHE + 0x045C: 0x9D, # CYRILLIC SMALL LETTER KJE + 0x045E: 0xA2, # CYRILLIC SMALL LETTER SHORT U + 0x045F: 0x9F, # CYRILLIC SMALL LETTER DZHE + 0x0490: 0xA5, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x0491: 0xB4, # CYRILLIC SMALL LETTER GHE WITH UPTURN 0x2013: 0x96, # EN DASH 0x2014: 0x97, # EM DASH 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK 0x2020: 0x86, # DAGGER 0x2021: 0x87, # DOUBLE DAGGER 0x2022: 0x95, # BULLET 0x2026: 0x85, # HORIZONTAL ELLIPSIS 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20ac: 0x88, # EURO SIGN - 0x2116: 0xb9, # NUMERO SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AC: 0x88, # EURO SIGN + 0x2116: 0xB9, # NUMERO SIGN 0x2122: 0x99, # TRADE MARK SIGN -} \ No newline at end of file +} + Index: cp1252.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1252.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp1252.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp1252.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\u20ac' # 0x80 -> EURO SIGN u'\ufffe' # 0x81 -> UNDEFINED u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK @@ -170,12 +170,12 @@ u'\u2021' # 0x87 -> DOUBLE DAGGER u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0160' # 0x8a -> LATIN CAPITAL LETTER S WITH CARON - u'\u2039' # 0x8b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8c -> LATIN CAPITAL LIGATURE OE - u'\ufffe' # 0x8d -> UNDEFINED - u'\u017d' # 0x8e -> LATIN CAPITAL LETTER Z WITH CARON - u'\ufffe' # 0x8f -> UNDEFINED + u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE + u'\ufffe' # 0x8D -> UNDEFINED + u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON + u'\ufffe' # 0x8F -> UNDEFINED u'\ufffe' # 0x90 -> UNDEFINED u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK @@ -186,108 +186,108 @@ u'\u2014' # 0x97 -> EM DASH u'\u02dc' # 0x98 -> SMALL TILDE u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0161' # 0x9a -> LATIN SMALL LETTER S WITH CARON - u'\u203a' # 0x9b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9c -> LATIN SMALL LIGATURE OE - u'\ufffe' # 0x9d -> UNDEFINED - u'\u017e' # 0x9e -> LATIN SMALL LETTER Z WITH CARON - u'\u0178' # 0x9f -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\xa1' # 0xa1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa4' # 0xa4 -> CURRENCY SIGN - u'\xa5' # 0xa5 -> YEN SIGN - u'\xa6' # 0xa6 -> BROKEN BAR - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xa8' # 0xa8 -> DIAERESIS - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\xaa' # 0xaa -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xac -> NOT SIGN - u'\xad' # 0xad -> SOFT HYPHEN - u'\xae' # 0xae -> REGISTERED SIGN - u'\xaf' # 0xaf -> MACRON - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\xb2' # 0xb2 -> SUPERSCRIPT TWO - u'\xb3' # 0xb3 -> SUPERSCRIPT THREE - u'\xb4' # 0xb4 -> ACUTE ACCENT - u'\xb5' # 0xb5 -> MICRO SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\xb8' # 0xb8 -> CEDILLA - u'\xb9' # 0xb9 -> SUPERSCRIPT ONE - u'\xba' # 0xba -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xbc -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xbe -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xbf -> INVERTED QUESTION MARK - u'\xc0' # 0xc0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xc1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xc2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xc3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xc4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xc5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xc6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xc7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xc8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xc9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xcb -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xcc -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xcd -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xcf -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xd0 -> LATIN CAPITAL LETTER ETH - u'\xd1' # 0xd1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xd2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xd3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xd4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xd5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xd6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xd7 -> MULTIPLICATION SIGN - u'\xd8' # 0xd8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xd9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xda -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xdb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xdc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xdd -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xde -> LATIN CAPITAL LETTER THORN - u'\xdf' # 0xdf -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xe0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xe1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xe2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xe3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xe4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xe5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xe6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xe7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xe8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xea -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xeb -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xec -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xed -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xee -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xef -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xf0 -> LATIN SMALL LETTER ETH - u'\xf1' # 0xf1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xf2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xf3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xf4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xf5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xf6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xf7 -> DIVISION SIGN - u'\xf8' # 0xf8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xf9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xfa -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xfb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xfd -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xfe -> LATIN SMALL LETTER THORN - u'\xff' # 0xff -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE + u'\ufffe' # 0x9D -> UNDEFINED + u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON + u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,133 +415,134 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a1: 0xa1, # INVERTED EXCLAMATION MARK - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00a5: 0xa5, # YEN SIGN - 0x00a6: 0xa6, # BROKEN BAR - 0x00a7: 0xa7, # SECTION SIGN - 0x00a8: 0xa8, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00aa: 0xaa, # FEMININE ORDINAL INDICATOR - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xac, # NOT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00ae: 0xae, # REGISTERED SIGN - 0x00af: 0xaf, # MACRON - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b2: 0xb2, # SUPERSCRIPT TWO - 0x00b3: 0xb3, # SUPERSCRIPT THREE - 0x00b4: 0xb4, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00b8: 0xb8, # CEDILLA - 0x00b9: 0xb9, # SUPERSCRIPT ONE - 0x00ba: 0xba, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xbc, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xbd, # VULGAR FRACTION ONE HALF - 0x00be: 0xbe, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0xbf, # INVERTED QUESTION MARK - 0x00c0: 0xc0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0xc1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xc2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0xc3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0xc4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0xc5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0xc6, # LATIN CAPITAL LETTER AE - 0x00c7: 0xc7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0xc8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0xc9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0xca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0xcb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0xcc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0xcd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0xcf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d0: 0xd0, # LATIN CAPITAL LETTER ETH - 0x00d1: 0xd1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xd2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xd3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xd4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xd5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0xd6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xd7, # MULTIPLICATION SIGN - 0x00d8: 0xd8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0xd9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xda, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xdb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0xdc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0xdd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0xde, # LATIN CAPITAL LETTER THORN - 0x00df: 0xdf, # LATIN SMALL LETTER SHARP S - 0x00e0: 0xe0, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0xe1, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0xe2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0xe3, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0xe4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0xe5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0xe6, # LATIN SMALL LETTER AE - 0x00e7: 0xe7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0xe8, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0xea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0xeb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0xec, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0xed, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0xee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0xef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f0: 0xf0, # LATIN SMALL LETTER ETH - 0x00f1: 0xf1, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0xf2, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0xf5, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xf7, # DIVISION SIGN - 0x00f8: 0xf8, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0xf9, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0xfb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0xfd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0xfe, # LATIN SMALL LETTER THORN - 0x00ff: 0xff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0152: 0x8c, # LATIN CAPITAL LIGATURE OE - 0x0153: 0x9c, # LATIN SMALL LIGATURE OE - 0x0160: 0x8a, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0x9a, # LATIN SMALL LETTER S WITH CARON - 0x0178: 0x9f, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x017d: 0x8e, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0x9e, # LATIN SMALL LETTER Z WITH CARON + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xBF, # INVERTED QUESTION MARK + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0xF0, # LATIN SMALL LETTER ETH + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0xFE, # LATIN SMALL LETTER THORN + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x9C, # LATIN SMALL LIGATURE OE + 0x0160: 0x8A, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x9A, # LATIN SMALL LETTER S WITH CARON + 0x0178: 0x9F, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x017D: 0x8E, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0x9E, # LATIN SMALL LETTER Z WITH CARON 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x02c6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02dc: 0x98, # SMALL TILDE + 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02DC: 0x98, # SMALL TILDE 0x2013: 0x96, # EN DASH 0x2014: 0x97, # EM DASH 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK 0x2020: 0x86, # DAGGER 0x2021: 0x87, # DOUBLE DAGGER 0x2022: 0x95, # BULLET 0x2026: 0x85, # HORIZONTAL ELLIPSIS 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20ac: 0x80, # EURO SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AC: 0x80, # EURO SIGN 0x2122: 0x99, # TRADE MARK SIGN -} \ No newline at end of file +} + Index: cp1253.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1253.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp1253.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp1253.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\u20ac' # 0x80 -> EURO SIGN u'\ufffe' # 0x81 -> UNDEFINED u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK @@ -170,12 +170,12 @@ u'\u2021' # 0x87 -> DOUBLE DAGGER u'\ufffe' # 0x88 -> UNDEFINED u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8a -> UNDEFINED - u'\u2039' # 0x8b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x8c -> UNDEFINED - u'\ufffe' # 0x8d -> UNDEFINED - u'\ufffe' # 0x8e -> UNDEFINED - u'\ufffe' # 0x8f -> UNDEFINED + u'\ufffe' # 0x8A -> UNDEFINED + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x8C -> UNDEFINED + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\ufffe' # 0x8F -> UNDEFINED u'\ufffe' # 0x90 -> UNDEFINED u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK @@ -186,108 +186,108 @@ u'\u2014' # 0x97 -> EM DASH u'\ufffe' # 0x98 -> UNDEFINED u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9a -> UNDEFINED - u'\u203a' # 0x9b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x9c -> UNDEFINED - u'\ufffe' # 0x9d -> UNDEFINED - u'\ufffe' # 0x9e -> UNDEFINED - u'\ufffe' # 0x9f -> UNDEFINED - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u0385' # 0xa1 -> GREEK DIALYTIKA TONOS - u'\u0386' # 0xa2 -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa4' # 0xa4 -> CURRENCY SIGN - u'\xa5' # 0xa5 -> YEN SIGN - u'\xa6' # 0xa6 -> BROKEN BAR - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xa8' # 0xa8 -> DIAERESIS - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\ufffe' # 0xaa -> UNDEFINED - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xac -> NOT SIGN - u'\xad' # 0xad -> SOFT HYPHEN - u'\xae' # 0xae -> REGISTERED SIGN - u'\u2015' # 0xaf -> HORIZONTAL BAR - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\xb2' # 0xb2 -> SUPERSCRIPT TWO - u'\xb3' # 0xb3 -> SUPERSCRIPT THREE - u'\u0384' # 0xb4 -> GREEK TONOS - u'\xb5' # 0xb5 -> MICRO SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\u0388' # 0xb8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0389' # 0xb9 -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0xba -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u038c' # 0xbc -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF - u'\u038e' # 0xbe -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u038f' # 0xbf -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\u0390' # 0xc0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u0391' # 0xc1 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0xc2 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0xc3 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0xc4 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0xc5 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0xc6 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0xc7 -> GREEK CAPITAL LETTER ETA - u'\u0398' # 0xc8 -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0xc9 -> GREEK CAPITAL LETTER IOTA - u'\u039a' # 0xca -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0xcb -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0xcc -> GREEK CAPITAL LETTER MU - u'\u039d' # 0xcd -> GREEK CAPITAL LETTER NU - u'\u039e' # 0xce -> GREEK CAPITAL LETTER XI - u'\u039f' # 0xcf -> GREEK CAPITAL LETTER OMICRON - u'\u03a0' # 0xd0 -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0xd1 -> GREEK CAPITAL LETTER RHO - u'\ufffe' # 0xd2 -> UNDEFINED - u'\u03a3' # 0xd3 -> GREEK CAPITAL LETTER SIGMA - u'\u03a4' # 0xd4 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0xd5 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0xd6 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0xd7 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0xd8 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0xd9 -> GREEK CAPITAL LETTER OMEGA - u'\u03aa' # 0xda -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u03ab' # 0xdb -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\u03ac' # 0xdc -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u03ad' # 0xdd -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xde -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03af' # 0xdf -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03b0' # 0xe0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\u03b1' # 0xe1 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0xe2 -> GREEK SMALL LETTER BETA - u'\u03b3' # 0xe3 -> GREEK SMALL LETTER GAMMA - u'\u03b4' # 0xe4 -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0xe5 -> GREEK SMALL LETTER EPSILON - u'\u03b6' # 0xe6 -> GREEK SMALL LETTER ZETA - u'\u03b7' # 0xe7 -> GREEK SMALL LETTER ETA - u'\u03b8' # 0xe8 -> GREEK SMALL LETTER THETA - u'\u03b9' # 0xe9 -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0xea -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0xeb -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0xec -> GREEK SMALL LETTER MU - u'\u03bd' # 0xed -> GREEK SMALL LETTER NU - u'\u03be' # 0xee -> GREEK SMALL LETTER XI - u'\u03bf' # 0xef -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xf0 -> GREEK SMALL LETTER PI - u'\u03c1' # 0xf1 -> GREEK SMALL LETTER RHO - u'\u03c2' # 0xf2 -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c3' # 0xf3 -> GREEK SMALL LETTER SIGMA - u'\u03c4' # 0xf4 -> GREEK SMALL LETTER TAU - u'\u03c5' # 0xf5 -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0xf6 -> GREEK SMALL LETTER PHI - u'\u03c7' # 0xf7 -> GREEK SMALL LETTER CHI - u'\u03c8' # 0xf8 -> GREEK SMALL LETTER PSI - u'\u03c9' # 0xf9 -> GREEK SMALL LETTER OMEGA - u'\u03ca' # 0xfa -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03cb' # 0xfb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03cc' # 0xfc -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0xfd -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03ce' # 0xfe -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\ufffe' # 0xff -> UNDEFINED + u'\ufffe' # 0x9A -> UNDEFINED + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x9C -> UNDEFINED + u'\ufffe' # 0x9D -> UNDEFINED + u'\ufffe' # 0x9E -> UNDEFINED + u'\ufffe' # 0x9F -> UNDEFINED + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0385' # 0xA1 -> GREEK DIALYTIKA TONOS + u'\u0386' # 0xA2 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\ufffe' # 0xAA -> UNDEFINED + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\u2015' # 0xAF -> HORIZONTAL BAR + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\u0384' # 0xB4 -> GREEK TONOS + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u0388' # 0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u038c' # 0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\u038e' # 0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\u0390' # 0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u0391' # 0xC1 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0xC2 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0xC3 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0xC4 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0xC5 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0xC6 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0xC7 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0xC8 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0xC9 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0xCA -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0xCB -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0xCC -> GREEK CAPITAL LETTER MU + u'\u039d' # 0xCD -> GREEK CAPITAL LETTER NU + u'\u039e' # 0xCE -> GREEK CAPITAL LETTER XI + u'\u039f' # 0xCF -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0xD0 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0xD1 -> GREEK CAPITAL LETTER RHO + u'\ufffe' # 0xD2 -> UNDEFINED + u'\u03a3' # 0xD3 -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0xD4 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0xD5 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0xD6 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0xD7 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0xD8 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0xD9 -> GREEK CAPITAL LETTER OMEGA + u'\u03aa' # 0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\u03ac' # 0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0xDE -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0xDF -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03b0' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0xE3 -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0xE6 -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0xE7 -> GREEK SMALL LETTER ETA + u'\u03b8' # 0xE8 -> GREEK SMALL LETTER THETA + u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0xEA -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0xEB -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0xEC -> GREEK SMALL LETTER MU + u'\u03bd' # 0xED -> GREEK SMALL LETTER NU + u'\u03be' # 0xEE -> GREEK SMALL LETTER XI + u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI + u'\u03c1' # 0xF1 -> GREEK SMALL LETTER RHO + u'\u03c2' # 0xF2 -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA + u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU + u'\u03c5' # 0xF5 -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0xF6 -> GREEK SMALL LETTER PHI + u'\u03c7' # 0xF7 -> GREEK SMALL LETTER CHI + u'\u03c8' # 0xF8 -> GREEK SMALL LETTER PSI + u'\u03c9' # 0xF9 -> GREEK SMALL LETTER OMEGA + u'\u03ca' # 0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03cb' # 0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03cc' # 0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03ce' # 0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\ufffe' # 0xFF -> UNDEFINED ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,121 +415,122 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a3: 0xa3, # POUND SIGN - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00a5: 0xa5, # YEN SIGN - 0x00a6: 0xa6, # BROKEN BAR - 0x00a7: 0xa7, # SECTION SIGN - 0x00a8: 0xa8, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xac, # NOT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00ae: 0xae, # REGISTERED SIGN - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b2: 0xb2, # SUPERSCRIPT TWO - 0x00b3: 0xb3, # SUPERSCRIPT THREE - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bd: 0xbd, # VULGAR FRACTION ONE HALF + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x0384: 0xb4, # GREEK TONOS - 0x0385: 0xa1, # GREEK DIALYTIKA TONOS - 0x0386: 0xa2, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0388: 0xb8, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0xb9, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038a: 0xba, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038c: 0xbc, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038e: 0xbe, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038f: 0xbf, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0390: 0xc0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x0391: 0xc1, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0xc2, # GREEK CAPITAL LETTER BETA - 0x0393: 0xc3, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0xc4, # GREEK CAPITAL LETTER DELTA - 0x0395: 0xc5, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0xc6, # GREEK CAPITAL LETTER ZETA - 0x0397: 0xc7, # GREEK CAPITAL LETTER ETA - 0x0398: 0xc8, # GREEK CAPITAL LETTER THETA - 0x0399: 0xc9, # GREEK CAPITAL LETTER IOTA - 0x039a: 0xca, # GREEK CAPITAL LETTER KAPPA - 0x039b: 0xcb, # GREEK CAPITAL LETTER LAMDA - 0x039c: 0xcc, # GREEK CAPITAL LETTER MU - 0x039d: 0xcd, # GREEK CAPITAL LETTER NU - 0x039e: 0xce, # GREEK CAPITAL LETTER XI - 0x039f: 0xcf, # GREEK CAPITAL LETTER OMICRON - 0x03a0: 0xd0, # GREEK CAPITAL LETTER PI - 0x03a1: 0xd1, # GREEK CAPITAL LETTER RHO - 0x03a3: 0xd3, # GREEK CAPITAL LETTER SIGMA - 0x03a4: 0xd4, # GREEK CAPITAL LETTER TAU - 0x03a5: 0xd5, # GREEK CAPITAL LETTER UPSILON - 0x03a6: 0xd6, # GREEK CAPITAL LETTER PHI - 0x03a7: 0xd7, # GREEK CAPITAL LETTER CHI - 0x03a8: 0xd8, # GREEK CAPITAL LETTER PSI - 0x03a9: 0xd9, # GREEK CAPITAL LETTER OMEGA - 0x03aa: 0xda, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03ab: 0xdb, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03ac: 0xdc, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03ad: 0xdd, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03ae: 0xde, # GREEK SMALL LETTER ETA WITH TONOS - 0x03af: 0xdf, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03b0: 0xe0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03b1: 0xe1, # GREEK SMALL LETTER ALPHA - 0x03b2: 0xe2, # GREEK SMALL LETTER BETA - 0x03b3: 0xe3, # GREEK SMALL LETTER GAMMA - 0x03b4: 0xe4, # GREEK SMALL LETTER DELTA - 0x03b5: 0xe5, # GREEK SMALL LETTER EPSILON - 0x03b6: 0xe6, # GREEK SMALL LETTER ZETA - 0x03b7: 0xe7, # GREEK SMALL LETTER ETA - 0x03b8: 0xe8, # GREEK SMALL LETTER THETA - 0x03b9: 0xe9, # GREEK SMALL LETTER IOTA - 0x03ba: 0xea, # GREEK SMALL LETTER KAPPA - 0x03bb: 0xeb, # GREEK SMALL LETTER LAMDA - 0x03bc: 0xec, # GREEK SMALL LETTER MU - 0x03bd: 0xed, # GREEK SMALL LETTER NU - 0x03be: 0xee, # GREEK SMALL LETTER XI - 0x03bf: 0xef, # GREEK SMALL LETTER OMICRON - 0x03c0: 0xf0, # GREEK SMALL LETTER PI - 0x03c1: 0xf1, # GREEK SMALL LETTER RHO - 0x03c2: 0xf2, # GREEK SMALL LETTER FINAL SIGMA - 0x03c3: 0xf3, # GREEK SMALL LETTER SIGMA - 0x03c4: 0xf4, # GREEK SMALL LETTER TAU - 0x03c5: 0xf5, # GREEK SMALL LETTER UPSILON - 0x03c6: 0xf6, # GREEK SMALL LETTER PHI - 0x03c7: 0xf7, # GREEK SMALL LETTER CHI - 0x03c8: 0xf8, # GREEK SMALL LETTER PSI - 0x03c9: 0xf9, # GREEK SMALL LETTER OMEGA - 0x03ca: 0xfa, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03cb: 0xfb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03cc: 0xfc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03cd: 0xfd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03ce: 0xfe, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x0384: 0xB4, # GREEK TONOS + 0x0385: 0xA1, # GREEK DIALYTIKA TONOS + 0x0386: 0xA2, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0xB8, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0xB9, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038A: 0xBA, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038C: 0xBC, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038E: 0xBE, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038F: 0xBF, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0xC0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0xC1, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0xC2, # GREEK CAPITAL LETTER BETA + 0x0393: 0xC3, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0xC4, # GREEK CAPITAL LETTER DELTA + 0x0395: 0xC5, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0xC6, # GREEK CAPITAL LETTER ZETA + 0x0397: 0xC7, # GREEK CAPITAL LETTER ETA + 0x0398: 0xC8, # GREEK CAPITAL LETTER THETA + 0x0399: 0xC9, # GREEK CAPITAL LETTER IOTA + 0x039A: 0xCA, # GREEK CAPITAL LETTER KAPPA + 0x039B: 0xCB, # GREEK CAPITAL LETTER LAMDA + 0x039C: 0xCC, # GREEK CAPITAL LETTER MU + 0x039D: 0xCD, # GREEK CAPITAL LETTER NU + 0x039E: 0xCE, # GREEK CAPITAL LETTER XI + 0x039F: 0xCF, # GREEK CAPITAL LETTER OMICRON + 0x03A0: 0xD0, # GREEK CAPITAL LETTER PI + 0x03A1: 0xD1, # GREEK CAPITAL LETTER RHO + 0x03A3: 0xD3, # GREEK CAPITAL LETTER SIGMA + 0x03A4: 0xD4, # GREEK CAPITAL LETTER TAU + 0x03A5: 0xD5, # GREEK CAPITAL LETTER UPSILON + 0x03A6: 0xD6, # GREEK CAPITAL LETTER PHI + 0x03A7: 0xD7, # GREEK CAPITAL LETTER CHI + 0x03A8: 0xD8, # GREEK CAPITAL LETTER PSI + 0x03A9: 0xD9, # GREEK CAPITAL LETTER OMEGA + 0x03AA: 0xDA, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03AB: 0xDB, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03AC: 0xDC, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03AD: 0xDD, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03AE: 0xDE, # GREEK SMALL LETTER ETA WITH TONOS + 0x03AF: 0xDF, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03B0: 0xE0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03B1: 0xE1, # GREEK SMALL LETTER ALPHA + 0x03B2: 0xE2, # GREEK SMALL LETTER BETA + 0x03B3: 0xE3, # GREEK SMALL LETTER GAMMA + 0x03B4: 0xE4, # GREEK SMALL LETTER DELTA + 0x03B5: 0xE5, # GREEK SMALL LETTER EPSILON + 0x03B6: 0xE6, # GREEK SMALL LETTER ZETA + 0x03B7: 0xE7, # GREEK SMALL LETTER ETA + 0x03B8: 0xE8, # GREEK SMALL LETTER THETA + 0x03B9: 0xE9, # GREEK SMALL LETTER IOTA + 0x03BA: 0xEA, # GREEK SMALL LETTER KAPPA + 0x03BB: 0xEB, # GREEK SMALL LETTER LAMDA + 0x03BC: 0xEC, # GREEK SMALL LETTER MU + 0x03BD: 0xED, # GREEK SMALL LETTER NU + 0x03BE: 0xEE, # GREEK SMALL LETTER XI + 0x03BF: 0xEF, # GREEK SMALL LETTER OMICRON + 0x03C0: 0xF0, # GREEK SMALL LETTER PI + 0x03C1: 0xF1, # GREEK SMALL LETTER RHO + 0x03C2: 0xF2, # GREEK SMALL LETTER FINAL SIGMA + 0x03C3: 0xF3, # GREEK SMALL LETTER SIGMA + 0x03C4: 0xF4, # GREEK SMALL LETTER TAU + 0x03C5: 0xF5, # GREEK SMALL LETTER UPSILON + 0x03C6: 0xF6, # GREEK SMALL LETTER PHI + 0x03C7: 0xF7, # GREEK SMALL LETTER CHI + 0x03C8: 0xF8, # GREEK SMALL LETTER PSI + 0x03C9: 0xF9, # GREEK SMALL LETTER OMEGA + 0x03CA: 0xFA, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03CB: 0xFB, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03CC: 0xFC, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03CD: 0xFD, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03CE: 0xFE, # GREEK SMALL LETTER OMEGA WITH TONOS 0x2013: 0x96, # EN DASH 0x2014: 0x97, # EM DASH - 0x2015: 0xaf, # HORIZONTAL BAR + 0x2015: 0xAF, # HORIZONTAL BAR 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK 0x2020: 0x86, # DAGGER 0x2021: 0x87, # DOUBLE DAGGER 0x2022: 0x95, # BULLET 0x2026: 0x85, # HORIZONTAL ELLIPSIS 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20ac: 0x80, # EURO SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AC: 0x80, # EURO SIGN 0x2122: 0x99, # TRADE MARK SIGN -} \ No newline at end of file +} + Index: cp1254.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1254.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp1254.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp1254.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\u20ac' # 0x80 -> EURO SIGN u'\ufffe' # 0x81 -> UNDEFINED u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK @@ -170,12 +170,12 @@ u'\u2021' # 0x87 -> DOUBLE DAGGER u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0160' # 0x8a -> LATIN CAPITAL LETTER S WITH CARON - u'\u2039' # 0x8b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8c -> LATIN CAPITAL LIGATURE OE - u'\ufffe' # 0x8d -> UNDEFINED - u'\ufffe' # 0x8e -> UNDEFINED - u'\ufffe' # 0x8f -> UNDEFINED + u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\ufffe' # 0x8F -> UNDEFINED u'\ufffe' # 0x90 -> UNDEFINED u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK @@ -186,108 +186,108 @@ u'\u2014' # 0x97 -> EM DASH u'\u02dc' # 0x98 -> SMALL TILDE u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0161' # 0x9a -> LATIN SMALL LETTER S WITH CARON - u'\u203a' # 0x9b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9c -> LATIN SMALL LIGATURE OE - u'\ufffe' # 0x9d -> UNDEFINED - u'\ufffe' # 0x9e -> UNDEFINED - u'\u0178' # 0x9f -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\xa1' # 0xa1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa4' # 0xa4 -> CURRENCY SIGN - u'\xa5' # 0xa5 -> YEN SIGN - u'\xa6' # 0xa6 -> BROKEN BAR - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xa8' # 0xa8 -> DIAERESIS - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\xaa' # 0xaa -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xac -> NOT SIGN - u'\xad' # 0xad -> SOFT HYPHEN - u'\xae' # 0xae -> REGISTERED SIGN - u'\xaf' # 0xaf -> MACRON - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\xb2' # 0xb2 -> SUPERSCRIPT TWO - u'\xb3' # 0xb3 -> SUPERSCRIPT THREE - u'\xb4' # 0xb4 -> ACUTE ACCENT - u'\xb5' # 0xb5 -> MICRO SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\xb8' # 0xb8 -> CEDILLA - u'\xb9' # 0xb9 -> SUPERSCRIPT ONE - u'\xba' # 0xba -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xbc -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xbe -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xbf -> INVERTED QUESTION MARK - u'\xc0' # 0xc0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xc1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xc2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xc3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xc4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xc5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xc6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xc7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xc8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xc9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xcb -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xcc -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xcd -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xcf -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u011e' # 0xd0 -> LATIN CAPITAL LETTER G WITH BREVE - u'\xd1' # 0xd1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xd2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xd3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xd4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xd5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xd6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xd7 -> MULTIPLICATION SIGN - u'\xd8' # 0xd8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xd9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xda -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xdb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xdc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0130' # 0xdd -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u015e' # 0xde -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\xdf' # 0xdf -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xe0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xe1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xe2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xe3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xe4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xe5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xe6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xe7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xe8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xea -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xeb -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xec -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xed -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xee -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xef -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u011f' # 0xf0 -> LATIN SMALL LETTER G WITH BREVE - u'\xf1' # 0xf1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xf2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xf3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xf4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xf5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xf6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xf7 -> DIVISION SIGN - u'\xf8' # 0xf8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xf9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xfa -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xfb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0131' # 0xfd -> LATIN SMALL LETTER DOTLESS I - u'\u015f' # 0xfe -> LATIN SMALL LETTER S WITH CEDILLA - u'\xff' # 0xff -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE + u'\ufffe' # 0x9D -> UNDEFINED + u'\ufffe' # 0x9E -> UNDEFINED + u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I + u'\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,131 +415,132 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a1: 0xa1, # INVERTED EXCLAMATION MARK - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00a5: 0xa5, # YEN SIGN - 0x00a6: 0xa6, # BROKEN BAR - 0x00a7: 0xa7, # SECTION SIGN - 0x00a8: 0xa8, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00aa: 0xaa, # FEMININE ORDINAL INDICATOR - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xac, # NOT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00ae: 0xae, # REGISTERED SIGN - 0x00af: 0xaf, # MACRON - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b2: 0xb2, # SUPERSCRIPT TWO - 0x00b3: 0xb3, # SUPERSCRIPT THREE - 0x00b4: 0xb4, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00b8: 0xb8, # CEDILLA - 0x00b9: 0xb9, # SUPERSCRIPT ONE - 0x00ba: 0xba, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xbc, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xbd, # VULGAR FRACTION ONE HALF - 0x00be: 0xbe, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0xbf, # INVERTED QUESTION MARK - 0x00c0: 0xc0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0xc1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xc2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0xc3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0xc4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0xc5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0xc6, # LATIN CAPITAL LETTER AE - 0x00c7: 0xc7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0xc8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0xc9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0xca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0xcb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0xcc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0xcd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0xcf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d1: 0xd1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xd2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xd3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xd4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xd5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0xd6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xd7, # MULTIPLICATION SIGN - 0x00d8: 0xd8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0xd9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xda, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xdb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0xdc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0xdf, # LATIN SMALL LETTER SHARP S - 0x00e0: 0xe0, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0xe1, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0xe2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0xe3, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0xe4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0xe5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0xe6, # LATIN SMALL LETTER AE - 0x00e7: 0xe7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0xe8, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0xea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0xeb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0xec, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0xed, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0xee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0xef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0xf1, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0xf2, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0xf5, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xf7, # DIVISION SIGN - 0x00f8: 0xf8, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0xf9, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0xfb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0xff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x011e: 0xd0, # LATIN CAPITAL LETTER G WITH BREVE - 0x011f: 0xf0, # LATIN SMALL LETTER G WITH BREVE - 0x0130: 0xdd, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0xfd, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0x8c, # LATIN CAPITAL LIGATURE OE - 0x0153: 0x9c, # LATIN SMALL LIGATURE OE - 0x015e: 0xde, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015f: 0xfe, # LATIN SMALL LETTER S WITH CEDILLA - 0x0160: 0x8a, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0x9a, # LATIN SMALL LETTER S WITH CARON - 0x0178: 0x9f, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xBF, # INVERTED QUESTION MARK + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011E: 0xD0, # LATIN CAPITAL LETTER G WITH BREVE + 0x011F: 0xF0, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0xDD, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0xFD, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x9C, # LATIN SMALL LIGATURE OE + 0x015E: 0xDE, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015F: 0xFE, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0x8A, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x9A, # LATIN SMALL LETTER S WITH CARON + 0x0178: 0x9F, # LATIN CAPITAL LETTER Y WITH DIAERESIS 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x02c6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02dc: 0x98, # SMALL TILDE + 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02DC: 0x98, # SMALL TILDE 0x2013: 0x96, # EN DASH 0x2014: 0x97, # EM DASH 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK 0x2020: 0x86, # DAGGER 0x2021: 0x87, # DOUBLE DAGGER 0x2022: 0x95, # BULLET 0x2026: 0x85, # HORIZONTAL ELLIPSIS 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20ac: 0x80, # EURO SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AC: 0x80, # EURO SIGN 0x2122: 0x99, # TRADE MARK SIGN -} \ No newline at end of file +} + Index: cp1255.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1255.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp1255.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp1255.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\u20ac' # 0x80 -> EURO SIGN u'\ufffe' # 0x81 -> UNDEFINED u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK @@ -170,12 +170,12 @@ u'\u2021' # 0x87 -> DOUBLE DAGGER u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8a -> UNDEFINED - u'\u2039' # 0x8b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x8c -> UNDEFINED - u'\ufffe' # 0x8d -> UNDEFINED - u'\ufffe' # 0x8e -> UNDEFINED - u'\ufffe' # 0x8f -> UNDEFINED + u'\ufffe' # 0x8A -> UNDEFINED + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x8C -> UNDEFINED + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\ufffe' # 0x8F -> UNDEFINED u'\ufffe' # 0x90 -> UNDEFINED u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK @@ -186,108 +186,108 @@ u'\u2014' # 0x97 -> EM DASH u'\u02dc' # 0x98 -> SMALL TILDE u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9a -> UNDEFINED - u'\u203a' # 0x9b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x9c -> UNDEFINED - u'\ufffe' # 0x9d -> UNDEFINED - u'\ufffe' # 0x9e -> UNDEFINED - u'\ufffe' # 0x9f -> UNDEFINED - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\xa1' # 0xa1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\u20aa' # 0xa4 -> NEW SHEQEL SIGN - u'\xa5' # 0xa5 -> YEN SIGN - u'\xa6' # 0xa6 -> BROKEN BAR - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xa8' # 0xa8 -> DIAERESIS - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\xd7' # 0xaa -> MULTIPLICATION SIGN - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xac -> NOT SIGN - u'\xad' # 0xad -> SOFT HYPHEN - u'\xae' # 0xae -> REGISTERED SIGN - u'\xaf' # 0xaf -> MACRON - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\xb2' # 0xb2 -> SUPERSCRIPT TWO - u'\xb3' # 0xb3 -> SUPERSCRIPT THREE - u'\xb4' # 0xb4 -> ACUTE ACCENT - u'\xb5' # 0xb5 -> MICRO SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\xb8' # 0xb8 -> CEDILLA - u'\xb9' # 0xb9 -> SUPERSCRIPT ONE - u'\xf7' # 0xba -> DIVISION SIGN - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xbc -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xbe -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xbf -> INVERTED QUESTION MARK - u'\u05b0' # 0xc0 -> HEBREW POINT SHEVA - u'\u05b1' # 0xc1 -> HEBREW POINT HATAF SEGOL - u'\u05b2' # 0xc2 -> HEBREW POINT HATAF PATAH - u'\u05b3' # 0xc3 -> HEBREW POINT HATAF QAMATS - u'\u05b4' # 0xc4 -> HEBREW POINT HIRIQ - u'\u05b5' # 0xc5 -> HEBREW POINT TSERE - u'\u05b6' # 0xc6 -> HEBREW POINT SEGOL - u'\u05b7' # 0xc7 -> HEBREW POINT PATAH - u'\u05b8' # 0xc8 -> HEBREW POINT QAMATS - u'\u05b9' # 0xc9 -> HEBREW POINT HOLAM - u'\ufffe' # 0xca -> UNDEFINED - u'\u05bb' # 0xcb -> HEBREW POINT QUBUTS - u'\u05bc' # 0xcc -> HEBREW POINT DAGESH OR MAPIQ - u'\u05bd' # 0xcd -> HEBREW POINT METEG - u'\u05be' # 0xce -> HEBREW PUNCTUATION MAQAF - u'\u05bf' # 0xcf -> HEBREW POINT RAFE - u'\u05c0' # 0xd0 -> HEBREW PUNCTUATION PASEQ - u'\u05c1' # 0xd1 -> HEBREW POINT SHIN DOT - u'\u05c2' # 0xd2 -> HEBREW POINT SIN DOT - u'\u05c3' # 0xd3 -> HEBREW PUNCTUATION SOF PASUQ - u'\u05f0' # 0xd4 -> HEBREW LIGATURE YIDDISH DOUBLE VAV - u'\u05f1' # 0xd5 -> HEBREW LIGATURE YIDDISH VAV YOD - u'\u05f2' # 0xd6 -> HEBREW LIGATURE YIDDISH DOUBLE YOD - u'\u05f3' # 0xd7 -> HEBREW PUNCTUATION GERESH - u'\u05f4' # 0xd8 -> HEBREW PUNCTUATION GERSHAYIM - u'\ufffe' # 0xd9 -> UNDEFINED - u'\ufffe' # 0xda -> UNDEFINED - u'\ufffe' # 0xdb -> UNDEFINED - u'\ufffe' # 0xdc -> UNDEFINED - u'\ufffe' # 0xdd -> UNDEFINED - u'\ufffe' # 0xde -> UNDEFINED - u'\ufffe' # 0xdf -> UNDEFINED - u'\u05d0' # 0xe0 -> HEBREW LETTER ALEF - u'\u05d1' # 0xe1 -> HEBREW LETTER BET - u'\u05d2' # 0xe2 -> HEBREW LETTER GIMEL - u'\u05d3' # 0xe3 -> HEBREW LETTER DALET - u'\u05d4' # 0xe4 -> HEBREW LETTER HE - u'\u05d5' # 0xe5 -> HEBREW LETTER VAV - u'\u05d6' # 0xe6 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0xe7 -> HEBREW LETTER HET - u'\u05d8' # 0xe8 -> HEBREW LETTER TET - u'\u05d9' # 0xe9 -> HEBREW LETTER YOD - u'\u05da' # 0xea -> HEBREW LETTER FINAL KAF - u'\u05db' # 0xeb -> HEBREW LETTER KAF - u'\u05dc' # 0xec -> HEBREW LETTER LAMED - u'\u05dd' # 0xed -> HEBREW LETTER FINAL MEM - u'\u05de' # 0xee -> HEBREW LETTER MEM - u'\u05df' # 0xef -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0xf0 -> HEBREW LETTER NUN - u'\u05e1' # 0xf1 -> HEBREW LETTER SAMEKH - u'\u05e2' # 0xf2 -> HEBREW LETTER AYIN - u'\u05e3' # 0xf3 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0xf4 -> HEBREW LETTER PE - u'\u05e5' # 0xf5 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0xf6 -> HEBREW LETTER TSADI - u'\u05e7' # 0xf7 -> HEBREW LETTER QOF - u'\u05e8' # 0xf8 -> HEBREW LETTER RESH - u'\u05e9' # 0xf9 -> HEBREW LETTER SHIN - u'\u05ea' # 0xfa -> HEBREW LETTER TAV - u'\ufffe' # 0xfb -> UNDEFINED - u'\ufffe' # 0xfc -> UNDEFINED - u'\u200e' # 0xfd -> LEFT-TO-RIGHT MARK - u'\u200f' # 0xfe -> RIGHT-TO-LEFT MARK - u'\ufffe' # 0xff -> UNDEFINED + u'\ufffe' # 0x9A -> UNDEFINED + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x9C -> UNDEFINED + u'\ufffe' # 0x9D -> UNDEFINED + u'\ufffe' # 0x9E -> UNDEFINED + u'\ufffe' # 0x9F -> UNDEFINED + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\u20aa' # 0xA4 -> NEW SHEQEL SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xd7' # 0xAA -> MULTIPLICATION SIGN + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xf7' # 0xBA -> DIVISION SIGN + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\u05b0' # 0xC0 -> HEBREW POINT SHEVA + u'\u05b1' # 0xC1 -> HEBREW POINT HATAF SEGOL + u'\u05b2' # 0xC2 -> HEBREW POINT HATAF PATAH + u'\u05b3' # 0xC3 -> HEBREW POINT HATAF QAMATS + u'\u05b4' # 0xC4 -> HEBREW POINT HIRIQ + u'\u05b5' # 0xC5 -> HEBREW POINT TSERE + u'\u05b6' # 0xC6 -> HEBREW POINT SEGOL + u'\u05b7' # 0xC7 -> HEBREW POINT PATAH + u'\u05b8' # 0xC8 -> HEBREW POINT QAMATS + u'\u05b9' # 0xC9 -> HEBREW POINT HOLAM + u'\ufffe' # 0xCA -> UNDEFINED + u'\u05bb' # 0xCB -> HEBREW POINT QUBUTS + u'\u05bc' # 0xCC -> HEBREW POINT DAGESH OR MAPIQ + u'\u05bd' # 0xCD -> HEBREW POINT METEG + u'\u05be' # 0xCE -> HEBREW PUNCTUATION MAQAF + u'\u05bf' # 0xCF -> HEBREW POINT RAFE + u'\u05c0' # 0xD0 -> HEBREW PUNCTUATION PASEQ + u'\u05c1' # 0xD1 -> HEBREW POINT SHIN DOT + u'\u05c2' # 0xD2 -> HEBREW POINT SIN DOT + u'\u05c3' # 0xD3 -> HEBREW PUNCTUATION SOF PASUQ + u'\u05f0' # 0xD4 -> HEBREW LIGATURE YIDDISH DOUBLE VAV + u'\u05f1' # 0xD5 -> HEBREW LIGATURE YIDDISH VAV YOD + u'\u05f2' # 0xD6 -> HEBREW LIGATURE YIDDISH DOUBLE YOD + u'\u05f3' # 0xD7 -> HEBREW PUNCTUATION GERESH + u'\u05f4' # 0xD8 -> HEBREW PUNCTUATION GERSHAYIM + u'\ufffe' # 0xD9 -> UNDEFINED + u'\ufffe' # 0xDA -> UNDEFINED + u'\ufffe' # 0xDB -> UNDEFINED + u'\ufffe' # 0xDC -> UNDEFINED + u'\ufffe' # 0xDD -> UNDEFINED + u'\ufffe' # 0xDE -> UNDEFINED + u'\ufffe' # 0xDF -> UNDEFINED + u'\u05d0' # 0xE0 -> HEBREW LETTER ALEF + u'\u05d1' # 0xE1 -> HEBREW LETTER BET + u'\u05d2' # 0xE2 -> HEBREW LETTER GIMEL + u'\u05d3' # 0xE3 -> HEBREW LETTER DALET + u'\u05d4' # 0xE4 -> HEBREW LETTER HE + u'\u05d5' # 0xE5 -> HEBREW LETTER VAV + u'\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0xE7 -> HEBREW LETTER HET + u'\u05d8' # 0xE8 -> HEBREW LETTER TET + u'\u05d9' # 0xE9 -> HEBREW LETTER YOD + u'\u05da' # 0xEA -> HEBREW LETTER FINAL KAF + u'\u05db' # 0xEB -> HEBREW LETTER KAF + u'\u05dc' # 0xEC -> HEBREW LETTER LAMED + u'\u05dd' # 0xED -> HEBREW LETTER FINAL MEM + u'\u05de' # 0xEE -> HEBREW LETTER MEM + u'\u05df' # 0xEF -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0xF0 -> HEBREW LETTER NUN + u'\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0xF2 -> HEBREW LETTER AYIN + u'\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0xF4 -> HEBREW LETTER PE + u'\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0xF6 -> HEBREW LETTER TSADI + u'\u05e7' # 0xF7 -> HEBREW LETTER QOF + u'\u05e8' # 0xF8 -> HEBREW LETTER RESH + u'\u05e9' # 0xF9 -> HEBREW LETTER SHIN + u'\u05ea' # 0xFA -> HEBREW LETTER TAV + u'\ufffe' # 0xFB -> UNDEFINED + u'\ufffe' # 0xFC -> UNDEFINED + u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK + u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK + u'\ufffe' # 0xFF -> UNDEFINED ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,115 +415,116 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a1: 0xa1, # INVERTED EXCLAMATION MARK - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a5: 0xa5, # YEN SIGN - 0x00a6: 0xa6, # BROKEN BAR - 0x00a7: 0xa7, # SECTION SIGN - 0x00a8: 0xa8, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xac, # NOT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00ae: 0xae, # REGISTERED SIGN - 0x00af: 0xaf, # MACRON - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b2: 0xb2, # SUPERSCRIPT TWO - 0x00b3: 0xb3, # SUPERSCRIPT THREE - 0x00b4: 0xb4, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00b8: 0xb8, # CEDILLA - 0x00b9: 0xb9, # SUPERSCRIPT ONE - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xbc, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xbd, # VULGAR FRACTION ONE HALF - 0x00be: 0xbe, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0xbf, # INVERTED QUESTION MARK - 0x00d7: 0xaa, # MULTIPLICATION SIGN - 0x00f7: 0xba, # DIVISION SIGN + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xBF, # INVERTED QUESTION MARK + 0x00D7: 0xAA, # MULTIPLICATION SIGN + 0x00F7: 0xBA, # DIVISION SIGN 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x02c6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02dc: 0x98, # SMALL TILDE - 0x05b0: 0xc0, # HEBREW POINT SHEVA - 0x05b1: 0xc1, # HEBREW POINT HATAF SEGOL - 0x05b2: 0xc2, # HEBREW POINT HATAF PATAH - 0x05b3: 0xc3, # HEBREW POINT HATAF QAMATS - 0x05b4: 0xc4, # HEBREW POINT HIRIQ - 0x05b5: 0xc5, # HEBREW POINT TSERE - 0x05b6: 0xc6, # HEBREW POINT SEGOL - 0x05b7: 0xc7, # HEBREW POINT PATAH - 0x05b8: 0xc8, # HEBREW POINT QAMATS - 0x05b9: 0xc9, # HEBREW POINT HOLAM - 0x05bb: 0xcb, # HEBREW POINT QUBUTS - 0x05bc: 0xcc, # HEBREW POINT DAGESH OR MAPIQ - 0x05bd: 0xcd, # HEBREW POINT METEG - 0x05be: 0xce, # HEBREW PUNCTUATION MAQAF - 0x05bf: 0xcf, # HEBREW POINT RAFE - 0x05c0: 0xd0, # HEBREW PUNCTUATION PASEQ - 0x05c1: 0xd1, # HEBREW POINT SHIN DOT - 0x05c2: 0xd2, # HEBREW POINT SIN DOT - 0x05c3: 0xd3, # HEBREW PUNCTUATION SOF PASUQ - 0x05d0: 0xe0, # HEBREW LETTER ALEF - 0x05d1: 0xe1, # HEBREW LETTER BET - 0x05d2: 0xe2, # HEBREW LETTER GIMEL - 0x05d3: 0xe3, # HEBREW LETTER DALET - 0x05d4: 0xe4, # HEBREW LETTER HE - 0x05d5: 0xe5, # HEBREW LETTER VAV - 0x05d6: 0xe6, # HEBREW LETTER ZAYIN - 0x05d7: 0xe7, # HEBREW LETTER HET - 0x05d8: 0xe8, # HEBREW LETTER TET - 0x05d9: 0xe9, # HEBREW LETTER YOD - 0x05da: 0xea, # HEBREW LETTER FINAL KAF - 0x05db: 0xeb, # HEBREW LETTER KAF - 0x05dc: 0xec, # HEBREW LETTER LAMED - 0x05dd: 0xed, # HEBREW LETTER FINAL MEM - 0x05de: 0xee, # HEBREW LETTER MEM - 0x05df: 0xef, # HEBREW LETTER FINAL NUN - 0x05e0: 0xf0, # HEBREW LETTER NUN - 0x05e1: 0xf1, # HEBREW LETTER SAMEKH - 0x05e2: 0xf2, # HEBREW LETTER AYIN - 0x05e3: 0xf3, # HEBREW LETTER FINAL PE - 0x05e4: 0xf4, # HEBREW LETTER PE - 0x05e5: 0xf5, # HEBREW LETTER FINAL TSADI - 0x05e6: 0xf6, # HEBREW LETTER TSADI - 0x05e7: 0xf7, # HEBREW LETTER QOF - 0x05e8: 0xf8, # HEBREW LETTER RESH - 0x05e9: 0xf9, # HEBREW LETTER SHIN - 0x05ea: 0xfa, # HEBREW LETTER TAV - 0x05f0: 0xd4, # HEBREW LIGATURE YIDDISH DOUBLE VAV - 0x05f1: 0xd5, # HEBREW LIGATURE YIDDISH VAV YOD - 0x05f2: 0xd6, # HEBREW LIGATURE YIDDISH DOUBLE YOD - 0x05f3: 0xd7, # HEBREW PUNCTUATION GERESH - 0x05f4: 0xd8, # HEBREW PUNCTUATION GERSHAYIM - 0x200e: 0xfd, # LEFT-TO-RIGHT MARK - 0x200f: 0xfe, # RIGHT-TO-LEFT MARK + 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02DC: 0x98, # SMALL TILDE + 0x05B0: 0xC0, # HEBREW POINT SHEVA + 0x05B1: 0xC1, # HEBREW POINT HATAF SEGOL + 0x05B2: 0xC2, # HEBREW POINT HATAF PATAH + 0x05B3: 0xC3, # HEBREW POINT HATAF QAMATS + 0x05B4: 0xC4, # HEBREW POINT HIRIQ + 0x05B5: 0xC5, # HEBREW POINT TSERE + 0x05B6: 0xC6, # HEBREW POINT SEGOL + 0x05B7: 0xC7, # HEBREW POINT PATAH + 0x05B8: 0xC8, # HEBREW POINT QAMATS + 0x05B9: 0xC9, # HEBREW POINT HOLAM + 0x05BB: 0xCB, # HEBREW POINT QUBUTS + 0x05BC: 0xCC, # HEBREW POINT DAGESH OR MAPIQ + 0x05BD: 0xCD, # HEBREW POINT METEG + 0x05BE: 0xCE, # HEBREW PUNCTUATION MAQAF + 0x05BF: 0xCF, # HEBREW POINT RAFE + 0x05C0: 0xD0, # HEBREW PUNCTUATION PASEQ + 0x05C1: 0xD1, # HEBREW POINT SHIN DOT + 0x05C2: 0xD2, # HEBREW POINT SIN DOT + 0x05C3: 0xD3, # HEBREW PUNCTUATION SOF PASUQ + 0x05D0: 0xE0, # HEBREW LETTER ALEF + 0x05D1: 0xE1, # HEBREW LETTER BET + 0x05D2: 0xE2, # HEBREW LETTER GIMEL + 0x05D3: 0xE3, # HEBREW LETTER DALET + 0x05D4: 0xE4, # HEBREW LETTER HE + 0x05D5: 0xE5, # HEBREW LETTER VAV + 0x05D6: 0xE6, # HEBREW LETTER ZAYIN + 0x05D7: 0xE7, # HEBREW LETTER HET + 0x05D8: 0xE8, # HEBREW LETTER TET + 0x05D9: 0xE9, # HEBREW LETTER YOD + 0x05DA: 0xEA, # HEBREW LETTER FINAL KAF + 0x05DB: 0xEB, # HEBREW LETTER KAF + 0x05DC: 0xEC, # HEBREW LETTER LAMED + 0x05DD: 0xED, # HEBREW LETTER FINAL MEM + 0x05DE: 0xEE, # HEBREW LETTER MEM + 0x05DF: 0xEF, # HEBREW LETTER FINAL NUN + 0x05E0: 0xF0, # HEBREW LETTER NUN + 0x05E1: 0xF1, # HEBREW LETTER SAMEKH + 0x05E2: 0xF2, # HEBREW LETTER AYIN + 0x05E3: 0xF3, # HEBREW LETTER FINAL PE + 0x05E4: 0xF4, # HEBREW LETTER PE + 0x05E5: 0xF5, # HEBREW LETTER FINAL TSADI + 0x05E6: 0xF6, # HEBREW LETTER TSADI + 0x05E7: 0xF7, # HEBREW LETTER QOF + 0x05E8: 0xF8, # HEBREW LETTER RESH + 0x05E9: 0xF9, # HEBREW LETTER SHIN + 0x05EA: 0xFA, # HEBREW LETTER TAV + 0x05F0: 0xD4, # HEBREW LIGATURE YIDDISH DOUBLE VAV + 0x05F1: 0xD5, # HEBREW LIGATURE YIDDISH VAV YOD + 0x05F2: 0xD6, # HEBREW LIGATURE YIDDISH DOUBLE YOD + 0x05F3: 0xD7, # HEBREW PUNCTUATION GERESH + 0x05F4: 0xD8, # HEBREW PUNCTUATION GERSHAYIM + 0x200E: 0xFD, # LEFT-TO-RIGHT MARK + 0x200F: 0xFE, # RIGHT-TO-LEFT MARK 0x2013: 0x96, # EN DASH 0x2014: 0x97, # EM DASH 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK 0x2020: 0x86, # DAGGER 0x2021: 0x87, # DOUBLE DAGGER 0x2022: 0x95, # BULLET 0x2026: 0x85, # HORIZONTAL ELLIPSIS 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20aa: 0xa4, # NEW SHEQEL SIGN - 0x20ac: 0x80, # EURO SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AA: 0xA4, # NEW SHEQEL SIGN + 0x20AC: 0x80, # EURO SIGN 0x2122: 0x99, # TRADE MARK SIGN -} \ No newline at end of file +} + Index: cp1256.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1256.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp1256.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp1256.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\u20ac' # 0x80 -> EURO SIGN u'\u067e' # 0x81 -> ARABIC LETTER PEH u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK @@ -170,12 +170,12 @@ u'\u2021' # 0x87 -> DOUBLE DAGGER u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0679' # 0x8a -> ARABIC LETTER TTEH - u'\u2039' # 0x8b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8c -> LATIN CAPITAL LIGATURE OE - u'\u0686' # 0x8d -> ARABIC LETTER TCHEH - u'\u0698' # 0x8e -> ARABIC LETTER JEH - u'\u0688' # 0x8f -> ARABIC LETTER DDAL + u'\u0679' # 0x8A -> ARABIC LETTER TTEH + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE + u'\u0686' # 0x8D -> ARABIC LETTER TCHEH + u'\u0698' # 0x8E -> ARABIC LETTER JEH + u'\u0688' # 0x8F -> ARABIC LETTER DDAL u'\u06af' # 0x90 -> ARABIC LETTER GAF u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK @@ -186,108 +186,108 @@ u'\u2014' # 0x97 -> EM DASH u'\u06a9' # 0x98 -> ARABIC LETTER KEHEH u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0691' # 0x9a -> ARABIC LETTER RREH - u'\u203a' # 0x9b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9c -> LATIN SMALL LIGATURE OE - u'\u200c' # 0x9d -> ZERO WIDTH NON-JOINER - u'\u200d' # 0x9e -> ZERO WIDTH JOINER - u'\u06ba' # 0x9f -> ARABIC LETTER NOON GHUNNA - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u060c' # 0xa1 -> ARABIC COMMA - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa4' # 0xa4 -> CURRENCY SIGN - u'\xa5' # 0xa5 -> YEN SIGN - u'\xa6' # 0xa6 -> BROKEN BAR - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xa8' # 0xa8 -> DIAERESIS - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\u06be' # 0xaa -> ARABIC LETTER HEH DOACHASHMEE - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xac -> NOT SIGN - u'\xad' # 0xad -> SOFT HYPHEN - u'\xae' # 0xae -> REGISTERED SIGN - u'\xaf' # 0xaf -> MACRON - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\xb2' # 0xb2 -> SUPERSCRIPT TWO - u'\xb3' # 0xb3 -> SUPERSCRIPT THREE - u'\xb4' # 0xb4 -> ACUTE ACCENT - u'\xb5' # 0xb5 -> MICRO SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\xb8' # 0xb8 -> CEDILLA - u'\xb9' # 0xb9 -> SUPERSCRIPT ONE - u'\u061b' # 0xba -> ARABIC SEMICOLON - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xbc -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xbe -> VULGAR FRACTION THREE QUARTERS - u'\u061f' # 0xbf -> ARABIC QUESTION MARK - u'\u06c1' # 0xc0 -> ARABIC LETTER HEH GOAL - u'\u0621' # 0xc1 -> ARABIC LETTER HAMZA - u'\u0622' # 0xc2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0xc3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0xc4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0xc5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0xc6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0xc7 -> ARABIC LETTER ALEF - u'\u0628' # 0xc8 -> ARABIC LETTER BEH - u'\u0629' # 0xc9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0xca -> ARABIC LETTER TEH - u'\u062b' # 0xcb -> ARABIC LETTER THEH - u'\u062c' # 0xcc -> ARABIC LETTER JEEM - u'\u062d' # 0xcd -> ARABIC LETTER HAH - u'\u062e' # 0xce -> ARABIC LETTER KHAH - u'\u062f' # 0xcf -> ARABIC LETTER DAL - u'\u0630' # 0xd0 -> ARABIC LETTER THAL - u'\u0631' # 0xd1 -> ARABIC LETTER REH - u'\u0632' # 0xd2 -> ARABIC LETTER ZAIN - u'\u0633' # 0xd3 -> ARABIC LETTER SEEN - u'\u0634' # 0xd4 -> ARABIC LETTER SHEEN - u'\u0635' # 0xd5 -> ARABIC LETTER SAD - u'\u0636' # 0xd6 -> ARABIC LETTER DAD - u'\xd7' # 0xd7 -> MULTIPLICATION SIGN - u'\u0637' # 0xd8 -> ARABIC LETTER TAH - u'\u0638' # 0xd9 -> ARABIC LETTER ZAH - u'\u0639' # 0xda -> ARABIC LETTER AIN - u'\u063a' # 0xdb -> ARABIC LETTER GHAIN - u'\u0640' # 0xdc -> ARABIC TATWEEL - u'\u0641' # 0xdd -> ARABIC LETTER FEH - u'\u0642' # 0xde -> ARABIC LETTER QAF - u'\u0643' # 0xdf -> ARABIC LETTER KAF - u'\xe0' # 0xe0 -> LATIN SMALL LETTER A WITH GRAVE - u'\u0644' # 0xe1 -> ARABIC LETTER LAM - u'\xe2' # 0xe2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0645' # 0xe3 -> ARABIC LETTER MEEM - u'\u0646' # 0xe4 -> ARABIC LETTER NOON - u'\u0647' # 0xe5 -> ARABIC LETTER HEH - u'\u0648' # 0xe6 -> ARABIC LETTER WAW - u'\xe7' # 0xe7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xe8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xea -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xeb -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0649' # 0xec -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0xed -> ARABIC LETTER YEH - u'\xee' # 0xee -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xef -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u064b' # 0xf0 -> ARABIC FATHATAN - u'\u064c' # 0xf1 -> ARABIC DAMMATAN - u'\u064d' # 0xf2 -> ARABIC KASRATAN - u'\u064e' # 0xf3 -> ARABIC FATHA - u'\xf4' # 0xf4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u064f' # 0xf5 -> ARABIC DAMMA - u'\u0650' # 0xf6 -> ARABIC KASRA - u'\xf7' # 0xf7 -> DIVISION SIGN - u'\u0651' # 0xf8 -> ARABIC SHADDA - u'\xf9' # 0xf9 -> LATIN SMALL LETTER U WITH GRAVE - u'\u0652' # 0xfa -> ARABIC SUKUN - u'\xfb' # 0xfb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u200e' # 0xfd -> LEFT-TO-RIGHT MARK - u'\u200f' # 0xfe -> RIGHT-TO-LEFT MARK - u'\u06d2' # 0xff -> ARABIC LETTER YEH BARREE + u'\u0691' # 0x9A -> ARABIC LETTER RREH + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE + u'\u200c' # 0x9D -> ZERO WIDTH NON-JOINER + u'\u200d' # 0x9E -> ZERO WIDTH JOINER + u'\u06ba' # 0x9F -> ARABIC LETTER NOON GHUNNA + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u060c' # 0xA1 -> ARABIC COMMA + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u06be' # 0xAA -> ARABIC LETTER HEH DOACHASHMEE + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\u061b' # 0xBA -> ARABIC SEMICOLON + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\u061f' # 0xBF -> ARABIC QUESTION MARK + u'\u06c1' # 0xC0 -> ARABIC LETTER HEH GOAL + u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA + u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0xC7 -> ARABIC LETTER ALEF + u'\u0628' # 0xC8 -> ARABIC LETTER BEH + u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0xCA -> ARABIC LETTER TEH + u'\u062b' # 0xCB -> ARABIC LETTER THEH + u'\u062c' # 0xCC -> ARABIC LETTER JEEM + u'\u062d' # 0xCD -> ARABIC LETTER HAH + u'\u062e' # 0xCE -> ARABIC LETTER KHAH + u'\u062f' # 0xCF -> ARABIC LETTER DAL + u'\u0630' # 0xD0 -> ARABIC LETTER THAL + u'\u0631' # 0xD1 -> ARABIC LETTER REH + u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN + u'\u0633' # 0xD3 -> ARABIC LETTER SEEN + u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN + u'\u0635' # 0xD5 -> ARABIC LETTER SAD + u'\u0636' # 0xD6 -> ARABIC LETTER DAD + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u0637' # 0xD8 -> ARABIC LETTER TAH + u'\u0638' # 0xD9 -> ARABIC LETTER ZAH + u'\u0639' # 0xDA -> ARABIC LETTER AIN + u'\u063a' # 0xDB -> ARABIC LETTER GHAIN + u'\u0640' # 0xDC -> ARABIC TATWEEL + u'\u0641' # 0xDD -> ARABIC LETTER FEH + u'\u0642' # 0xDE -> ARABIC LETTER QAF + u'\u0643' # 0xDF -> ARABIC LETTER KAF + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\u0644' # 0xE1 -> ARABIC LETTER LAM + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0645' # 0xE3 -> ARABIC LETTER MEEM + u'\u0646' # 0xE4 -> ARABIC LETTER NOON + u'\u0647' # 0xE5 -> ARABIC LETTER HEH + u'\u0648' # 0xE6 -> ARABIC LETTER WAW + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0649' # 0xEC -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0xED -> ARABIC LETTER YEH + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u064b' # 0xF0 -> ARABIC FATHATAN + u'\u064c' # 0xF1 -> ARABIC DAMMATAN + u'\u064d' # 0xF2 -> ARABIC KASRATAN + u'\u064e' # 0xF3 -> ARABIC FATHA + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u064f' # 0xF5 -> ARABIC DAMMA + u'\u0650' # 0xF6 -> ARABIC KASRA + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u0651' # 0xF8 -> ARABIC SHADDA + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\u0652' # 0xFA -> ARABIC SUKUN + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK + u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK + u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,138 +415,139 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00a5: 0xa5, # YEN SIGN - 0x00a6: 0xa6, # BROKEN BAR - 0x00a7: 0xa7, # SECTION SIGN - 0x00a8: 0xa8, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xac, # NOT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00ae: 0xae, # REGISTERED SIGN - 0x00af: 0xaf, # MACRON - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b2: 0xb2, # SUPERSCRIPT TWO - 0x00b3: 0xb3, # SUPERSCRIPT THREE - 0x00b4: 0xb4, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00b8: 0xb8, # CEDILLA - 0x00b9: 0xb9, # SUPERSCRIPT ONE - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xbc, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xbd, # VULGAR FRACTION ONE HALF - 0x00be: 0xbe, # VULGAR FRACTION THREE QUARTERS - 0x00d7: 0xd7, # MULTIPLICATION SIGN - 0x00e0: 0xe0, # LATIN SMALL LETTER A WITH GRAVE - 0x00e2: 0xe2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e7: 0xe7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0xe8, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0xea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0xeb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ee: 0xee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0xef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f7: 0xf7, # DIVISION SIGN - 0x00f9: 0xf9, # LATIN SMALL LETTER U WITH GRAVE - 0x00fb: 0xfb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0152: 0x8c, # LATIN CAPITAL LIGATURE OE - 0x0153: 0x9c, # LATIN SMALL LIGATURE OE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x9C, # LATIN SMALL LIGATURE OE 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x02c6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x060c: 0xa1, # ARABIC COMMA - 0x061b: 0xba, # ARABIC SEMICOLON - 0x061f: 0xbf, # ARABIC QUESTION MARK - 0x0621: 0xc1, # ARABIC LETTER HAMZA - 0x0622: 0xc2, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x0623: 0xc3, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x0624: 0xc4, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x0625: 0xc5, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x0626: 0xc6, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x0627: 0xc7, # ARABIC LETTER ALEF - 0x0628: 0xc8, # ARABIC LETTER BEH - 0x0629: 0xc9, # ARABIC LETTER TEH MARBUTA - 0x062a: 0xca, # ARABIC LETTER TEH - 0x062b: 0xcb, # ARABIC LETTER THEH - 0x062c: 0xcc, # ARABIC LETTER JEEM - 0x062d: 0xcd, # ARABIC LETTER HAH - 0x062e: 0xce, # ARABIC LETTER KHAH - 0x062f: 0xcf, # ARABIC LETTER DAL - 0x0630: 0xd0, # ARABIC LETTER THAL - 0x0631: 0xd1, # ARABIC LETTER REH - 0x0632: 0xd2, # ARABIC LETTER ZAIN - 0x0633: 0xd3, # ARABIC LETTER SEEN - 0x0634: 0xd4, # ARABIC LETTER SHEEN - 0x0635: 0xd5, # ARABIC LETTER SAD - 0x0636: 0xd6, # ARABIC LETTER DAD - 0x0637: 0xd8, # ARABIC LETTER TAH - 0x0638: 0xd9, # ARABIC LETTER ZAH - 0x0639: 0xda, # ARABIC LETTER AIN - 0x063a: 0xdb, # ARABIC LETTER GHAIN - 0x0640: 0xdc, # ARABIC TATWEEL - 0x0641: 0xdd, # ARABIC LETTER FEH - 0x0642: 0xde, # ARABIC LETTER QAF - 0x0643: 0xdf, # ARABIC LETTER KAF - 0x0644: 0xe1, # ARABIC LETTER LAM - 0x0645: 0xe3, # ARABIC LETTER MEEM - 0x0646: 0xe4, # ARABIC LETTER NOON - 0x0647: 0xe5, # ARABIC LETTER HEH - 0x0648: 0xe6, # ARABIC LETTER WAW - 0x0649: 0xec, # ARABIC LETTER ALEF MAKSURA - 0x064a: 0xed, # ARABIC LETTER YEH - 0x064b: 0xf0, # ARABIC FATHATAN - 0x064c: 0xf1, # ARABIC DAMMATAN - 0x064d: 0xf2, # ARABIC KASRATAN - 0x064e: 0xf3, # ARABIC FATHA - 0x064f: 0xf5, # ARABIC DAMMA - 0x0650: 0xf6, # ARABIC KASRA - 0x0651: 0xf8, # ARABIC SHADDA - 0x0652: 0xfa, # ARABIC SUKUN - 0x0679: 0x8a, # ARABIC LETTER TTEH - 0x067e: 0x81, # ARABIC LETTER PEH - 0x0686: 0x8d, # ARABIC LETTER TCHEH - 0x0688: 0x8f, # ARABIC LETTER DDAL - 0x0691: 0x9a, # ARABIC LETTER RREH - 0x0698: 0x8e, # ARABIC LETTER JEH - 0x06a9: 0x98, # ARABIC LETTER KEHEH - 0x06af: 0x90, # ARABIC LETTER GAF - 0x06ba: 0x9f, # ARABIC LETTER NOON GHUNNA - 0x06be: 0xaa, # ARABIC LETTER HEH DOACHASHMEE - 0x06c1: 0xc0, # ARABIC LETTER HEH GOAL - 0x06d2: 0xff, # ARABIC LETTER YEH BARREE - 0x200c: 0x9d, # ZERO WIDTH NON-JOINER - 0x200d: 0x9e, # ZERO WIDTH JOINER - 0x200e: 0xfd, # LEFT-TO-RIGHT MARK - 0x200f: 0xfe, # RIGHT-TO-LEFT MARK + 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x060C: 0xA1, # ARABIC COMMA + 0x061B: 0xBA, # ARABIC SEMICOLON + 0x061F: 0xBF, # ARABIC QUESTION MARK + 0x0621: 0xC1, # ARABIC LETTER HAMZA + 0x0622: 0xC2, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x0623: 0xC3, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x0624: 0xC4, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x0625: 0xC5, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x0626: 0xC6, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x0627: 0xC7, # ARABIC LETTER ALEF + 0x0628: 0xC8, # ARABIC LETTER BEH + 0x0629: 0xC9, # ARABIC LETTER TEH MARBUTA + 0x062A: 0xCA, # ARABIC LETTER TEH + 0x062B: 0xCB, # ARABIC LETTER THEH + 0x062C: 0xCC, # ARABIC LETTER JEEM + 0x062D: 0xCD, # ARABIC LETTER HAH + 0x062E: 0xCE, # ARABIC LETTER KHAH + 0x062F: 0xCF, # ARABIC LETTER DAL + 0x0630: 0xD0, # ARABIC LETTER THAL + 0x0631: 0xD1, # ARABIC LETTER REH + 0x0632: 0xD2, # ARABIC LETTER ZAIN + 0x0633: 0xD3, # ARABIC LETTER SEEN + 0x0634: 0xD4, # ARABIC LETTER SHEEN + 0x0635: 0xD5, # ARABIC LETTER SAD + 0x0636: 0xD6, # ARABIC LETTER DAD + 0x0637: 0xD8, # ARABIC LETTER TAH + 0x0638: 0xD9, # ARABIC LETTER ZAH + 0x0639: 0xDA, # ARABIC LETTER AIN + 0x063A: 0xDB, # ARABIC LETTER GHAIN + 0x0640: 0xDC, # ARABIC TATWEEL + 0x0641: 0xDD, # ARABIC LETTER FEH + 0x0642: 0xDE, # ARABIC LETTER QAF + 0x0643: 0xDF, # ARABIC LETTER KAF + 0x0644: 0xE1, # ARABIC LETTER LAM + 0x0645: 0xE3, # ARABIC LETTER MEEM + 0x0646: 0xE4, # ARABIC LETTER NOON + 0x0647: 0xE5, # ARABIC LETTER HEH + 0x0648: 0xE6, # ARABIC LETTER WAW + 0x0649: 0xEC, # ARABIC LETTER ALEF MAKSURA + 0x064A: 0xED, # ARABIC LETTER YEH + 0x064B: 0xF0, # ARABIC FATHATAN + 0x064C: 0xF1, # ARABIC DAMMATAN + 0x064D: 0xF2, # ARABIC KASRATAN + 0x064E: 0xF3, # ARABIC FATHA + 0x064F: 0xF5, # ARABIC DAMMA + 0x0650: 0xF6, # ARABIC KASRA + 0x0651: 0xF8, # ARABIC SHADDA + 0x0652: 0xFA, # ARABIC SUKUN + 0x0679: 0x8A, # ARABIC LETTER TTEH + 0x067E: 0x81, # ARABIC LETTER PEH + 0x0686: 0x8D, # ARABIC LETTER TCHEH + 0x0688: 0x8F, # ARABIC LETTER DDAL + 0x0691: 0x9A, # ARABIC LETTER RREH + 0x0698: 0x8E, # ARABIC LETTER JEH + 0x06A9: 0x98, # ARABIC LETTER KEHEH + 0x06AF: 0x90, # ARABIC LETTER GAF + 0x06BA: 0x9F, # ARABIC LETTER NOON GHUNNA + 0x06BE: 0xAA, # ARABIC LETTER HEH DOACHASHMEE + 0x06C1: 0xC0, # ARABIC LETTER HEH GOAL + 0x06D2: 0xFF, # ARABIC LETTER YEH BARREE + 0x200C: 0x9D, # ZERO WIDTH NON-JOINER + 0x200D: 0x9E, # ZERO WIDTH JOINER + 0x200E: 0xFD, # LEFT-TO-RIGHT MARK + 0x200F: 0xFE, # RIGHT-TO-LEFT MARK 0x2013: 0x96, # EN DASH 0x2014: 0x97, # EM DASH 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK 0x2020: 0x86, # DAGGER 0x2021: 0x87, # DOUBLE DAGGER 0x2022: 0x95, # BULLET 0x2026: 0x85, # HORIZONTAL ELLIPSIS 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20ac: 0x80, # EURO SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AC: 0x80, # EURO SIGN 0x2122: 0x99, # TRADE MARK SIGN -} \ No newline at end of file +} + Index: cp1257.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1257.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp1257.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp1257.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\u20ac' # 0x80 -> EURO SIGN u'\ufffe' # 0x81 -> UNDEFINED u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK @@ -170,12 +170,12 @@ u'\u2021' # 0x87 -> DOUBLE DAGGER u'\ufffe' # 0x88 -> UNDEFINED u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8a -> UNDEFINED - u'\u2039' # 0x8b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x8c -> UNDEFINED - u'\xa8' # 0x8d -> DIAERESIS - u'\u02c7' # 0x8e -> CARON - u'\xb8' # 0x8f -> CEDILLA + u'\ufffe' # 0x8A -> UNDEFINED + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x8C -> UNDEFINED + u'\xa8' # 0x8D -> DIAERESIS + u'\u02c7' # 0x8E -> CARON + u'\xb8' # 0x8F -> CEDILLA u'\ufffe' # 0x90 -> UNDEFINED u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK @@ -186,108 +186,108 @@ u'\u2014' # 0x97 -> EM DASH u'\ufffe' # 0x98 -> UNDEFINED u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9a -> UNDEFINED - u'\u203a' # 0x9b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x9c -> UNDEFINED - u'\xaf' # 0x9d -> MACRON - u'\u02db' # 0x9e -> OGONEK - u'\ufffe' # 0x9f -> UNDEFINED - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\ufffe' # 0xa1 -> UNDEFINED - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa4' # 0xa4 -> CURRENCY SIGN - u'\ufffe' # 0xa5 -> UNDEFINED - u'\xa6' # 0xa6 -> BROKEN BAR - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xd8' # 0xa8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\u0156' # 0xaa -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xac -> NOT SIGN - u'\xad' # 0xad -> SOFT HYPHEN - u'\xae' # 0xae -> REGISTERED SIGN - u'\xc6' # 0xaf -> LATIN CAPITAL LETTER AE - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\xb2' # 0xb2 -> SUPERSCRIPT TWO - u'\xb3' # 0xb3 -> SUPERSCRIPT THREE - u'\xb4' # 0xb4 -> ACUTE ACCENT - u'\xb5' # 0xb5 -> MICRO SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\xf8' # 0xb8 -> LATIN SMALL LETTER O WITH STROKE - u'\xb9' # 0xb9 -> SUPERSCRIPT ONE - u'\u0157' # 0xba -> LATIN SMALL LETTER R WITH CEDILLA - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xbc -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xbe -> VULGAR FRACTION THREE QUARTERS - u'\xe6' # 0xbf -> LATIN SMALL LETTER AE - u'\u0104' # 0xc0 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u012e' # 0xc1 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u0100' # 0xc2 -> LATIN CAPITAL LETTER A WITH MACRON - u'\u0106' # 0xc3 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc4' # 0xc4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xc5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\u0118' # 0xc6 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u0112' # 0xc7 -> LATIN CAPITAL LETTER E WITH MACRON - u'\u010c' # 0xc8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xc9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0179' # 0xca -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\u0116' # 0xcb -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\u0122' # 0xcc -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u0136' # 0xcd -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\u012a' # 0xce -> LATIN CAPITAL LETTER I WITH MACRON - u'\u013b' # 0xcf -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u0160' # 0xd0 -> LATIN CAPITAL LETTER S WITH CARON - u'\u0143' # 0xd1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0145' # 0xd2 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\xd3' # 0xd3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\u014c' # 0xd4 -> LATIN CAPITAL LETTER O WITH MACRON - u'\xd5' # 0xd5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xd6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xd7 -> MULTIPLICATION SIGN - u'\u0172' # 0xd8 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\u0141' # 0xd9 -> LATIN CAPITAL LETTER L WITH STROKE - u'\u015a' # 0xda -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u016a' # 0xdb -> LATIN CAPITAL LETTER U WITH MACRON - u'\xdc' # 0xdc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u017b' # 0xdd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u017d' # 0xde -> LATIN CAPITAL LETTER Z WITH CARON - u'\xdf' # 0xdf -> LATIN SMALL LETTER SHARP S - u'\u0105' # 0xe0 -> LATIN SMALL LETTER A WITH OGONEK - u'\u012f' # 0xe1 -> LATIN SMALL LETTER I WITH OGONEK - u'\u0101' # 0xe2 -> LATIN SMALL LETTER A WITH MACRON - u'\u0107' # 0xe3 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe4' # 0xe4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xe5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\u0119' # 0xe6 -> LATIN SMALL LETTER E WITH OGONEK - u'\u0113' # 0xe7 -> LATIN SMALL LETTER E WITH MACRON - u'\u010d' # 0xe8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u017a' # 0xea -> LATIN SMALL LETTER Z WITH ACUTE - u'\u0117' # 0xeb -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\u0123' # 0xec -> LATIN SMALL LETTER G WITH CEDILLA - u'\u0137' # 0xed -> LATIN SMALL LETTER K WITH CEDILLA - u'\u012b' # 0xee -> LATIN SMALL LETTER I WITH MACRON - u'\u013c' # 0xef -> LATIN SMALL LETTER L WITH CEDILLA - u'\u0161' # 0xf0 -> LATIN SMALL LETTER S WITH CARON - u'\u0144' # 0xf1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0146' # 0xf2 -> LATIN SMALL LETTER N WITH CEDILLA - u'\xf3' # 0xf3 -> LATIN SMALL LETTER O WITH ACUTE - u'\u014d' # 0xf4 -> LATIN SMALL LETTER O WITH MACRON - u'\xf5' # 0xf5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xf6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xf7 -> DIVISION SIGN - u'\u0173' # 0xf8 -> LATIN SMALL LETTER U WITH OGONEK - u'\u0142' # 0xf9 -> LATIN SMALL LETTER L WITH STROKE - u'\u015b' # 0xfa -> LATIN SMALL LETTER S WITH ACUTE - u'\u016b' # 0xfb -> LATIN SMALL LETTER U WITH MACRON - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u017c' # 0xfd -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u017e' # 0xfe -> LATIN SMALL LETTER Z WITH CARON - u'\u02d9' # 0xff -> DOT ABOVE + u'\ufffe' # 0x9A -> UNDEFINED + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x9C -> UNDEFINED + u'\xaf' # 0x9D -> MACRON + u'\u02db' # 0x9E -> OGONEK + u'\ufffe' # 0x9F -> UNDEFINED + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\ufffe' # 0xA1 -> UNDEFINED + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\ufffe' # 0xA5 -> UNDEFINED + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xe6' # 0xBF -> LATIN SMALL LETTER AE + u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON + u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE + u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK + u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK + u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON + u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK + u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE + u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA + u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA + u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON + u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON + u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK + u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE + u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE + u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON + u'\u02d9' # 0xFF -> DOT ABOVE ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,126 +415,127 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00a6: 0xa6, # BROKEN BAR - 0x00a7: 0xa7, # SECTION SIGN - 0x00a8: 0x8d, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xac, # NOT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00ae: 0xae, # REGISTERED SIGN - 0x00af: 0x9d, # MACRON - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b2: 0xb2, # SUPERSCRIPT TWO - 0x00b3: 0xb3, # SUPERSCRIPT THREE - 0x00b4: 0xb4, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00b8: 0x8f, # CEDILLA - 0x00b9: 0xb9, # SUPERSCRIPT ONE - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xbc, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xbd, # VULGAR FRACTION ONE HALF - 0x00be: 0xbe, # VULGAR FRACTION THREE QUARTERS - 0x00c4: 0xc4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0xc5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0xaf, # LATIN CAPITAL LETTER AE - 0x00c9: 0xc9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d3: 0xd3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d5: 0xd5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0xd6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xd7, # MULTIPLICATION SIGN - 0x00d8: 0xa8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00dc: 0xdc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0xdf, # LATIN SMALL LETTER SHARP S - 0x00e4: 0xe4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0xe5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0xbf, # LATIN SMALL LETTER AE - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE - 0x00f5: 0xf5, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xf7, # DIVISION SIGN - 0x00f8: 0xb8, # LATIN SMALL LETTER O WITH STROKE - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0100: 0xc2, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0xe2, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0xc0, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xe0, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0xc3, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xe3, # LATIN SMALL LETTER C WITH ACUTE - 0x010c: 0xc8, # LATIN CAPITAL LETTER C WITH CARON - 0x010d: 0xe8, # LATIN SMALL LETTER C WITH CARON - 0x0112: 0xc7, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0xe7, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0xcb, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0xeb, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0xc6, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xe6, # LATIN SMALL LETTER E WITH OGONEK - 0x0122: 0xcc, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0xec, # LATIN SMALL LETTER G WITH CEDILLA - 0x012a: 0xce, # LATIN CAPITAL LETTER I WITH MACRON - 0x012b: 0xee, # LATIN SMALL LETTER I WITH MACRON - 0x012e: 0xc1, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012f: 0xe1, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0xcd, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0xed, # LATIN SMALL LETTER K WITH CEDILLA - 0x013b: 0xcf, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013c: 0xef, # LATIN SMALL LETTER L WITH CEDILLA - 0x0141: 0xd9, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xf9, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xd1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xf1, # LATIN SMALL LETTER N WITH ACUTE - 0x0145: 0xd2, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0xf2, # LATIN SMALL LETTER N WITH CEDILLA - 0x014c: 0xd4, # LATIN CAPITAL LETTER O WITH MACRON - 0x014d: 0xf4, # LATIN SMALL LETTER O WITH MACRON - 0x0156: 0xaa, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x0157: 0xba, # LATIN SMALL LETTER R WITH CEDILLA - 0x015a: 0xda, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015b: 0xfa, # LATIN SMALL LETTER S WITH ACUTE - 0x0160: 0xd0, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xf0, # LATIN SMALL LETTER S WITH CARON - 0x016a: 0xdb, # LATIN CAPITAL LETTER U WITH MACRON - 0x016b: 0xfb, # LATIN SMALL LETTER U WITH MACRON - 0x0172: 0xd8, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0xf8, # LATIN SMALL LETTER U WITH OGONEK - 0x0179: 0xca, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017a: 0xea, # LATIN SMALL LETTER Z WITH ACUTE - 0x017b: 0xdd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017c: 0xfd, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017d: 0xde, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0xfe, # LATIN SMALL LETTER Z WITH CARON - 0x02c7: 0x8e, # CARON - 0x02d9: 0xff, # DOT ABOVE - 0x02db: 0x9e, # OGONEK + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0x8D, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0x9D, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0x8F, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xAF, # LATIN CAPITAL LETTER AE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xA8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xBF, # LATIN SMALL LETTER AE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xB8, # LATIN SMALL LETTER O WITH STROKE + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0xC2, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0xE2, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0xC0, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0xE0, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0xC3, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0xE3, # LATIN SMALL LETTER C WITH ACUTE + 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON + 0x0112: 0xC7, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0xE7, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0xCB, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0xEB, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0xC6, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xE6, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0xCC, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0xEC, # LATIN SMALL LETTER G WITH CEDILLA + 0x012A: 0xCE, # LATIN CAPITAL LETTER I WITH MACRON + 0x012B: 0xEE, # LATIN SMALL LETTER I WITH MACRON + 0x012E: 0xC1, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012F: 0xE1, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0xCD, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0xED, # LATIN SMALL LETTER K WITH CEDILLA + 0x013B: 0xCF, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013C: 0xEF, # LATIN SMALL LETTER L WITH CEDILLA + 0x0141: 0xD9, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0xF9, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE + 0x0145: 0xD2, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0xF2, # LATIN SMALL LETTER N WITH CEDILLA + 0x014C: 0xD4, # LATIN CAPITAL LETTER O WITH MACRON + 0x014D: 0xF4, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0xAA, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0xBA, # LATIN SMALL LETTER R WITH CEDILLA + 0x015A: 0xDA, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015B: 0xFA, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0xD0, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xF0, # LATIN SMALL LETTER S WITH CARON + 0x016A: 0xDB, # LATIN CAPITAL LETTER U WITH MACRON + 0x016B: 0xFB, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0xD8, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0xF8, # LATIN SMALL LETTER U WITH OGONEK + 0x0179: 0xCA, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017A: 0xEA, # LATIN SMALL LETTER Z WITH ACUTE + 0x017B: 0xDD, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017C: 0xFD, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017D: 0xDE, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xFE, # LATIN SMALL LETTER Z WITH CARON + 0x02C7: 0x8E, # CARON + 0x02D9: 0xFF, # DOT ABOVE + 0x02DB: 0x9E, # OGONEK 0x2013: 0x96, # EN DASH 0x2014: 0x97, # EM DASH 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK 0x2020: 0x86, # DAGGER 0x2021: 0x87, # DOUBLE DAGGER 0x2022: 0x95, # BULLET 0x2026: 0x85, # HORIZONTAL ELLIPSIS 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20ac: 0x80, # EURO SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AC: 0x80, # EURO SIGN 0x2122: 0x99, # TRADE MARK SIGN -} \ No newline at end of file +} + Index: cp1258.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp1258.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp1258.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp1258.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\u20ac' # 0x80 -> EURO SIGN u'\ufffe' # 0x81 -> UNDEFINED u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK @@ -170,12 +170,12 @@ u'\u2021' # 0x87 -> DOUBLE DAGGER u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8a -> UNDEFINED - u'\u2039' # 0x8b -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8c -> LATIN CAPITAL LIGATURE OE - u'\ufffe' # 0x8d -> UNDEFINED - u'\ufffe' # 0x8e -> UNDEFINED - u'\ufffe' # 0x8f -> UNDEFINED + u'\ufffe' # 0x8A -> UNDEFINED + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\ufffe' # 0x8F -> UNDEFINED u'\ufffe' # 0x90 -> UNDEFINED u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK @@ -186,108 +186,108 @@ u'\u2014' # 0x97 -> EM DASH u'\u02dc' # 0x98 -> SMALL TILDE u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9a -> UNDEFINED - u'\u203a' # 0x9b -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9c -> LATIN SMALL LIGATURE OE - u'\ufffe' # 0x9d -> UNDEFINED - u'\ufffe' # 0x9e -> UNDEFINED - u'\u0178' # 0x9f -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\xa1' # 0xa1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa4' # 0xa4 -> CURRENCY SIGN - u'\xa5' # 0xa5 -> YEN SIGN - u'\xa6' # 0xa6 -> BROKEN BAR - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xa8' # 0xa8 -> DIAERESIS - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\xaa' # 0xaa -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xac -> NOT SIGN - u'\xad' # 0xad -> SOFT HYPHEN - u'\xae' # 0xae -> REGISTERED SIGN - u'\xaf' # 0xaf -> MACRON - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\xb2' # 0xb2 -> SUPERSCRIPT TWO - u'\xb3' # 0xb3 -> SUPERSCRIPT THREE - u'\xb4' # 0xb4 -> ACUTE ACCENT - u'\xb5' # 0xb5 -> MICRO SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\xb8' # 0xb8 -> CEDILLA - u'\xb9' # 0xb9 -> SUPERSCRIPT ONE - u'\xba' # 0xba -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xbc -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xbe -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xbf -> INVERTED QUESTION MARK - u'\xc0' # 0xc0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xc1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xc2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xc3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xc4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xc5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xc6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xc7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xc8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xc9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xcb -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u0300' # 0xcc -> COMBINING GRAVE ACCENT - u'\xcd' # 0xcd -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xcf -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u0110' # 0xd0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\xd1' # 0xd1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\u0309' # 0xd2 -> COMBINING HOOK ABOVE - u'\xd3' # 0xd3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xd4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u01a0' # 0xd5 -> LATIN CAPITAL LETTER O WITH HORN - u'\xd6' # 0xd6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xd7 -> MULTIPLICATION SIGN - u'\xd8' # 0xd8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xd9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xda -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xdb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xdc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u01af' # 0xdd -> LATIN CAPITAL LETTER U WITH HORN - u'\u0303' # 0xde -> COMBINING TILDE - u'\xdf' # 0xdf -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xe0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xe1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xe2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xe3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xe4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xe5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xe6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xe7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xe8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xea -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xeb -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0301' # 0xec -> COMBINING ACUTE ACCENT - u'\xed' # 0xed -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xee -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xef -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u0111' # 0xf0 -> LATIN SMALL LETTER D WITH STROKE - u'\xf1' # 0xf1 -> LATIN SMALL LETTER N WITH TILDE - u'\u0323' # 0xf2 -> COMBINING DOT BELOW - u'\xf3' # 0xf3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xf4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u01a1' # 0xf5 -> LATIN SMALL LETTER O WITH HORN - u'\xf6' # 0xf6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xf7 -> DIVISION SIGN - u'\xf8' # 0xf8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xf9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xfa -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xfb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u01b0' # 0xfd -> LATIN SMALL LETTER U WITH HORN - u'\u20ab' # 0xfe -> DONG SIGN - u'\xff' # 0xff -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\ufffe' # 0x9A -> UNDEFINED + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE + u'\ufffe' # 0x9D -> UNDEFINED + u'\ufffe' # 0x9E -> UNDEFINED + u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u0300' # 0xCC -> COMBINING GRAVE ACCENT + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\u0309' # 0xD2 -> COMBINING HOOK ABOVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u01a0' # 0xD5 -> LATIN CAPITAL LETTER O WITH HORN + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u01af' # 0xDD -> LATIN CAPITAL LETTER U WITH HORN + u'\u0303' # 0xDE -> COMBINING TILDE + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0301' # 0xEC -> COMBINING ACUTE ACCENT + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\u0323' # 0xF2 -> COMBINING DOT BELOW + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u01a1' # 0xF5 -> LATIN SMALL LETTER O WITH HORN + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u01b0' # 0xFD -> LATIN SMALL LETTER U WITH HORN + u'\u20ab' # 0xFE -> DONG SIGN + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,129 +415,130 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a1: 0xa1, # INVERTED EXCLAMATION MARK - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00a5: 0xa5, # YEN SIGN - 0x00a6: 0xa6, # BROKEN BAR - 0x00a7: 0xa7, # SECTION SIGN - 0x00a8: 0xa8, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00aa: 0xaa, # FEMININE ORDINAL INDICATOR - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xac, # NOT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00ae: 0xae, # REGISTERED SIGN - 0x00af: 0xaf, # MACRON - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b2: 0xb2, # SUPERSCRIPT TWO - 0x00b3: 0xb3, # SUPERSCRIPT THREE - 0x00b4: 0xb4, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00b8: 0xb8, # CEDILLA - 0x00b9: 0xb9, # SUPERSCRIPT ONE - 0x00ba: 0xba, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xbc, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xbd, # VULGAR FRACTION ONE HALF - 0x00be: 0xbe, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0xbf, # INVERTED QUESTION MARK - 0x00c0: 0xc0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0xc1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xc2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c4: 0xc4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0xc5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0xc6, # LATIN CAPITAL LETTER AE - 0x00c7: 0xc7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0xc8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0xc9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0xca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0xcb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cd: 0xcd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0xcf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d1: 0xd1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d3: 0xd3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xd4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d6: 0xd6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xd7, # MULTIPLICATION SIGN - 0x00d8: 0xd8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0xd9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xda, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xdb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0xdc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0xdf, # LATIN SMALL LETTER SHARP S - 0x00e0: 0xe0, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0xe1, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0xe2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0xe4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0xe5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0xe6, # LATIN SMALL LETTER AE - 0x00e7: 0xe7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0xe8, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0xea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0xeb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ed: 0xed, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0xee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0xef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0xf1, # LATIN SMALL LETTER N WITH TILDE - 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xf7, # DIVISION SIGN - 0x00f8: 0xf8, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0xf9, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0xfb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0xff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0102: 0xc3, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0xe3, # LATIN SMALL LETTER A WITH BREVE - 0x0110: 0xd0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xf0, # LATIN SMALL LETTER D WITH STROKE - 0x0152: 0x8c, # LATIN CAPITAL LIGATURE OE - 0x0153: 0x9c, # LATIN SMALL LIGATURE OE - 0x0178: 0x9f, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xBF, # INVERTED QUESTION MARK + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE + 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE + 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE + 0x0153: 0x9C, # LATIN SMALL LIGATURE OE + 0x0178: 0x9F, # LATIN CAPITAL LETTER Y WITH DIAERESIS 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x01a0: 0xd5, # LATIN CAPITAL LETTER O WITH HORN - 0x01a1: 0xf5, # LATIN SMALL LETTER O WITH HORN - 0x01af: 0xdd, # LATIN CAPITAL LETTER U WITH HORN - 0x01b0: 0xfd, # LATIN SMALL LETTER U WITH HORN - 0x02c6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02dc: 0x98, # SMALL TILDE - 0x0300: 0xcc, # COMBINING GRAVE ACCENT - 0x0301: 0xec, # COMBINING ACUTE ACCENT - 0x0303: 0xde, # COMBINING TILDE - 0x0309: 0xd2, # COMBINING HOOK ABOVE - 0x0323: 0xf2, # COMBINING DOT BELOW + 0x01A0: 0xD5, # LATIN CAPITAL LETTER O WITH HORN + 0x01A1: 0xF5, # LATIN SMALL LETTER O WITH HORN + 0x01AF: 0xDD, # LATIN CAPITAL LETTER U WITH HORN + 0x01B0: 0xFD, # LATIN SMALL LETTER U WITH HORN + 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02DC: 0x98, # SMALL TILDE + 0x0300: 0xCC, # COMBINING GRAVE ACCENT + 0x0301: 0xEC, # COMBINING ACUTE ACCENT + 0x0303: 0xDE, # COMBINING TILDE + 0x0309: 0xD2, # COMBINING HOOK ABOVE + 0x0323: 0xF2, # COMBINING DOT BELOW 0x2013: 0x96, # EN DASH 0x2014: 0x97, # EM DASH 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201a: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0x84, # DOUBLE LOW-9 QUOTATION MARK + 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK 0x2020: 0x86, # DAGGER 0x2021: 0x87, # DOUBLE DAGGER 0x2022: 0x95, # BULLET 0x2026: 0x85, # HORIZONTAL ELLIPSIS 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8b, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203a: 0x9b, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20ab: 0xfe, # DONG SIGN - 0x20ac: 0x80, # EURO SIGN + 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x20AB: 0xFE, # DONG SIGN + 0x20AC: 0x80, # EURO SIGN 0x2122: 0x99, # TRADE MARK SIGN -} \ No newline at end of file +} + Index: cp424.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp424.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp424.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp424.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x7f' # 0x07 -> DELETE u'\x97' # 0x08 -> GRAPHIC ESCAPE u'\x8d' # 0x09 -> SUPERSCRIPT - u'\x8e' # 0x0a -> REPEAT - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\x8e' # 0x0A -> REPEAT + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x87' # 0x17 -> PROGRAM OPERATOR COMMUNICATION u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1a -> UNIT BACK SPACE - u'\x8f' # 0x1b -> CUSTOMER USE ONE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x92' # 0x1A -> UNIT BACK SPACE + u'\x8f' # 0x1B -> CUSTOMER USE ONE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u'\x80' # 0x20 -> DIGIT SELECT u'\x81' # 0x21 -> START OF SIGNIFICANCE u'\x82' # 0x22 -> FIELD SEPARATOR @@ -74,12 +74,12 @@ u'\x1b' # 0x27 -> ESCAPE u'\x88' # 0x28 -> SET ATTRIBUTE u'\x89' # 0x29 -> START FIELD EXTENDED - u'\x8a' # 0x2a -> SET MODE OR SWITCH - u'\x8b' # 0x2b -> CONTROL SEQUENCE PREFIX - u'\x8c' # 0x2c -> MODIFY FIELD ATTRIBUTE - u'\x05' # 0x2d -> ENQUIRY - u'\x06' # 0x2e -> ACKNOWLEDGE - u'\x07' # 0x2f -> BELL + u'\x8a' # 0x2A -> SET MODE OR SWITCH + u'\x8b' # 0x2B -> CONTROL SEQUENCE PREFIX + u'\x8c' # 0x2C -> MODIFY FIELD ATTRIBUTE + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL u'\x90' # 0x30 -> u'\x91' # 0x31 -> u'\x16' # 0x32 -> SYNCHRONOUS IDLE @@ -90,12 +90,12 @@ u'\x04' # 0x37 -> END OF TRANSMISSION u'\x98' # 0x38 -> SUBSCRIPT u'\x99' # 0x39 -> INDENT TABULATION - u'\x9a' # 0x3a -> REVERSE FORM FEED - u'\x9b' # 0x3b -> CUSTOMER USE THREE - u'\x14' # 0x3c -> DEVICE CONTROL FOUR - u'\x15' # 0x3d -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3e -> - u'\x1a' # 0x3f -> SUBSTITUTE + u'\x9a' # 0x3A -> REVERSE FORM FEED + u'\x9b' # 0x3B -> CUSTOMER USE THREE + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> + u'\x1a' # 0x3F -> SUBSTITUTE u' ' # 0x40 -> SPACE u'\u05d0' # 0x41 -> HEBREW LETTER ALEF u'\u05d1' # 0x42 -> HEBREW LETTER BET @@ -106,12 +106,12 @@ u'\u05d6' # 0x47 -> HEBREW LETTER ZAYIN u'\u05d7' # 0x48 -> HEBREW LETTER HET u'\u05d8' # 0x49 -> HEBREW LETTER TET - u'\xa2' # 0x4a -> CENT SIGN - u'.' # 0x4b -> FULL STOP - u'<' # 0x4c -> LESS-THAN SIGN - u'(' # 0x4d -> LEFT PARENTHESIS - u'+' # 0x4e -> PLUS SIGN - u'|' # 0x4f -> VERTICAL LINE + u'\xa2' # 0x4A -> CENT SIGN + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'|' # 0x4F -> VERTICAL LINE u'&' # 0x50 -> AMPERSAND u'\u05d9' # 0x51 -> HEBREW LETTER YOD u'\u05da' # 0x52 -> HEBREW LETTER FINAL KAF @@ -122,12 +122,12 @@ u'\u05df' # 0x57 -> HEBREW LETTER FINAL NUN u'\u05e0' # 0x58 -> HEBREW LETTER NUN u'\u05e1' # 0x59 -> HEBREW LETTER SAMEKH - u'!' # 0x5a -> EXCLAMATION MARK - u'$' # 0x5b -> DOLLAR SIGN - u'*' # 0x5c -> ASTERISK - u')' # 0x5d -> RIGHT PARENTHESIS - u';' # 0x5e -> SEMICOLON - u'\xac' # 0x5f -> NOT SIGN + u'!' # 0x5A -> EXCLAMATION MARK + u'$' # 0x5B -> DOLLAR SIGN + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'\xac' # 0x5F -> NOT SIGN u'-' # 0x60 -> HYPHEN-MINUS u'/' # 0x61 -> SOLIDUS u'\u05e2' # 0x62 -> HEBREW LETTER AYIN @@ -138,12 +138,12 @@ u'\u05e7' # 0x67 -> HEBREW LETTER QOF u'\u05e8' # 0x68 -> HEBREW LETTER RESH u'\u05e9' # 0x69 -> HEBREW LETTER SHIN - u'\xa6' # 0x6a -> BROKEN BAR - u',' # 0x6b -> COMMA - u'%' # 0x6c -> PERCENT SIGN - u'_' # 0x6d -> LOW LINE - u'>' # 0x6e -> GREATER-THAN SIGN - u'?' # 0x6f -> QUESTION MARK + u'\xa6' # 0x6A -> BROKEN BAR + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK u'\ufffe' # 0x70 -> UNDEFINED u'\u05ea' # 0x71 -> HEBREW LETTER TAV u'\ufffe' # 0x72 -> UNDEFINED @@ -154,12 +154,12 @@ u'\ufffe' # 0x77 -> UNDEFINED u'\u2017' # 0x78 -> DOUBLE LOW LINE u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7a -> COLON - u'#' # 0x7b -> NUMBER SIGN - u'@' # 0x7c -> COMMERCIAL AT - u"'" # 0x7d -> APOSTROPHE - u'=' # 0x7e -> EQUALS SIGN - u'"' # 0x7f -> QUOTATION MARK + u':' # 0x7A -> COLON + u'#' # 0x7B -> NUMBER SIGN + u'@' # 0x7C -> COMMERCIAL AT + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'"' # 0x7F -> QUOTATION MARK u'\ufffe' # 0x80 -> UNDEFINED u'a' # 0x81 -> LATIN SMALL LETTER A u'b' # 0x82 -> LATIN SMALL LETTER B @@ -170,12 +170,12 @@ u'g' # 0x87 -> LATIN SMALL LETTER G u'h' # 0x88 -> LATIN SMALL LETTER H u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8a -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8b -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\ufffe' # 0x8c -> UNDEFINED - u'\ufffe' # 0x8d -> UNDEFINED - u'\ufffe' # 0x8e -> UNDEFINED - u'\xb1' # 0x8f -> PLUS-MINUS SIGN + u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\ufffe' # 0x8C -> UNDEFINED + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\xb1' # 0x8F -> PLUS-MINUS SIGN u'\xb0' # 0x90 -> DEGREE SIGN u'j' # 0x91 -> LATIN SMALL LETTER J u'k' # 0x92 -> LATIN SMALL LETTER K @@ -186,108 +186,108 @@ u'p' # 0x97 -> LATIN SMALL LETTER P u'q' # 0x98 -> LATIN SMALL LETTER Q u'r' # 0x99 -> LATIN SMALL LETTER R - u'\ufffe' # 0x9a -> UNDEFINED - u'\ufffe' # 0x9b -> UNDEFINED - u'\ufffe' # 0x9c -> UNDEFINED - u'\xb8' # 0x9d -> CEDILLA - u'\ufffe' # 0x9e -> UNDEFINED - u'\xa4' # 0x9f -> CURRENCY SIGN - u'\xb5' # 0xa0 -> MICRO SIGN - u'~' # 0xa1 -> TILDE - u's' # 0xa2 -> LATIN SMALL LETTER S - u't' # 0xa3 -> LATIN SMALL LETTER T - u'u' # 0xa4 -> LATIN SMALL LETTER U - u'v' # 0xa5 -> LATIN SMALL LETTER V - u'w' # 0xa6 -> LATIN SMALL LETTER W - u'x' # 0xa7 -> LATIN SMALL LETTER X - u'y' # 0xa8 -> LATIN SMALL LETTER Y - u'z' # 0xa9 -> LATIN SMALL LETTER Z - u'\ufffe' # 0xaa -> UNDEFINED - u'\ufffe' # 0xab -> UNDEFINED - u'\ufffe' # 0xac -> UNDEFINED - u'\ufffe' # 0xad -> UNDEFINED - u'\ufffe' # 0xae -> UNDEFINED - u'\xae' # 0xaf -> REGISTERED SIGN - u'^' # 0xb0 -> CIRCUMFLEX ACCENT - u'\xa3' # 0xb1 -> POUND SIGN - u'\xa5' # 0xb2 -> YEN SIGN - u'\xb7' # 0xb3 -> MIDDLE DOT - u'\xa9' # 0xb4 -> COPYRIGHT SIGN - u'\xa7' # 0xb5 -> SECTION SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xbc' # 0xb7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xb8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xb9 -> VULGAR FRACTION THREE QUARTERS - u'[' # 0xba -> LEFT SQUARE BRACKET - u']' # 0xbb -> RIGHT SQUARE BRACKET - u'\xaf' # 0xbc -> MACRON - u'\xa8' # 0xbd -> DIAERESIS - u'\xb4' # 0xbe -> ACUTE ACCENT - u'\xd7' # 0xbf -> MULTIPLICATION SIGN - u'{' # 0xc0 -> LEFT CURLY BRACKET - u'A' # 0xc1 -> LATIN CAPITAL LETTER A - u'B' # 0xc2 -> LATIN CAPITAL LETTER B - u'C' # 0xc3 -> LATIN CAPITAL LETTER C - u'D' # 0xc4 -> LATIN CAPITAL LETTER D - u'E' # 0xc5 -> LATIN CAPITAL LETTER E - u'F' # 0xc6 -> LATIN CAPITAL LETTER F - u'G' # 0xc7 -> LATIN CAPITAL LETTER G - u'H' # 0xc8 -> LATIN CAPITAL LETTER H - u'I' # 0xc9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xca -> SOFT HYPHEN - u'\ufffe' # 0xcb -> UNDEFINED - u'\ufffe' # 0xcc -> UNDEFINED - u'\ufffe' # 0xcd -> UNDEFINED - u'\ufffe' # 0xce -> UNDEFINED - u'\ufffe' # 0xcf -> UNDEFINED - u'}' # 0xd0 -> RIGHT CURLY BRACKET - u'J' # 0xd1 -> LATIN CAPITAL LETTER J - u'K' # 0xd2 -> LATIN CAPITAL LETTER K - u'L' # 0xd3 -> LATIN CAPITAL LETTER L - u'M' # 0xd4 -> LATIN CAPITAL LETTER M - u'N' # 0xd5 -> LATIN CAPITAL LETTER N - u'O' # 0xd6 -> LATIN CAPITAL LETTER O - u'P' # 0xd7 -> LATIN CAPITAL LETTER P - u'Q' # 0xd8 -> LATIN CAPITAL LETTER Q - u'R' # 0xd9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xda -> SUPERSCRIPT ONE - u'\ufffe' # 0xdb -> UNDEFINED - u'\ufffe' # 0xdc -> UNDEFINED - u'\ufffe' # 0xdd -> UNDEFINED - u'\ufffe' # 0xde -> UNDEFINED - u'\ufffe' # 0xdf -> UNDEFINED - u'\\' # 0xe0 -> REVERSE SOLIDUS - u'\xf7' # 0xe1 -> DIVISION SIGN - u'S' # 0xe2 -> LATIN CAPITAL LETTER S - u'T' # 0xe3 -> LATIN CAPITAL LETTER T - u'U' # 0xe4 -> LATIN CAPITAL LETTER U - u'V' # 0xe5 -> LATIN CAPITAL LETTER V - u'W' # 0xe6 -> LATIN CAPITAL LETTER W - u'X' # 0xe7 -> LATIN CAPITAL LETTER X - u'Y' # 0xe8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xe9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xea -> SUPERSCRIPT TWO - u'\ufffe' # 0xeb -> UNDEFINED - u'\ufffe' # 0xec -> UNDEFINED - u'\ufffe' # 0xed -> UNDEFINED - u'\ufffe' # 0xee -> UNDEFINED - u'\ufffe' # 0xef -> UNDEFINED - u'0' # 0xf0 -> DIGIT ZERO - u'1' # 0xf1 -> DIGIT ONE - u'2' # 0xf2 -> DIGIT TWO - u'3' # 0xf3 -> DIGIT THREE - u'4' # 0xf4 -> DIGIT FOUR - u'5' # 0xf5 -> DIGIT FIVE - u'6' # 0xf6 -> DIGIT SIX - u'7' # 0xf7 -> DIGIT SEVEN - u'8' # 0xf8 -> DIGIT EIGHT - u'9' # 0xf9 -> DIGIT NINE - u'\xb3' # 0xfa -> SUPERSCRIPT THREE - u'\ufffe' # 0xfb -> UNDEFINED - u'\ufffe' # 0xfc -> UNDEFINED - u'\ufffe' # 0xfd -> UNDEFINED - u'\ufffe' # 0xfe -> UNDEFINED - u'\x9f' # 0xff -> EIGHT ONES + u'\ufffe' # 0x9A -> UNDEFINED + u'\ufffe' # 0x9B -> UNDEFINED + u'\ufffe' # 0x9C -> UNDEFINED + u'\xb8' # 0x9D -> CEDILLA + u'\ufffe' # 0x9E -> UNDEFINED + u'\xa4' # 0x9F -> CURRENCY SIGN + u'\xb5' # 0xA0 -> MICRO SIGN + u'~' # 0xA1 -> TILDE + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\ufffe' # 0xAA -> UNDEFINED + u'\ufffe' # 0xAB -> UNDEFINED + u'\ufffe' # 0xAC -> UNDEFINED + u'\ufffe' # 0xAD -> UNDEFINED + u'\ufffe' # 0xAE -> UNDEFINED + u'\xae' # 0xAF -> REGISTERED SIGN + u'^' # 0xB0 -> CIRCUMFLEX ACCENT + u'\xa3' # 0xB1 -> POUND SIGN + u'\xa5' # 0xB2 -> YEN SIGN + u'\xb7' # 0xB3 -> MIDDLE DOT + u'\xa9' # 0xB4 -> COPYRIGHT SIGN + u'\xa7' # 0xB5 -> SECTION SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS + u'[' # 0xBA -> LEFT SQUARE BRACKET + u']' # 0xBB -> RIGHT SQUARE BRACKET + u'\xaf' # 0xBC -> MACRON + u'\xa8' # 0xBD -> DIAERESIS + u'\xb4' # 0xBE -> ACUTE ACCENT + u'\xd7' # 0xBF -> MULTIPLICATION SIGN + u'{' # 0xC0 -> LEFT CURLY BRACKET + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\ufffe' # 0xCB -> UNDEFINED + u'\ufffe' # 0xCC -> UNDEFINED + u'\ufffe' # 0xCD -> UNDEFINED + u'\ufffe' # 0xCE -> UNDEFINED + u'\ufffe' # 0xCF -> UNDEFINED + u'}' # 0xD0 -> RIGHT CURLY BRACKET + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xDA -> SUPERSCRIPT ONE + u'\ufffe' # 0xDB -> UNDEFINED + u'\ufffe' # 0xDC -> UNDEFINED + u'\ufffe' # 0xDD -> UNDEFINED + u'\ufffe' # 0xDE -> UNDEFINED + u'\ufffe' # 0xDF -> UNDEFINED + u'\\' # 0xE0 -> REVERSE SOLIDUS + u'\xf7' # 0xE1 -> DIVISION SIGN + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\ufffe' # 0xEB -> UNDEFINED + u'\ufffe' # 0xEC -> UNDEFINED + u'\ufffe' # 0xED -> UNDEFINED + u'\ufffe' # 0xEE -> UNDEFINED + u'\ufffe' # 0xEF -> UNDEFINED + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\ufffe' # 0xFB -> UNDEFINED + u'\ufffe' # 0xFC -> UNDEFINED + u'\ufffe' # 0xFD -> UNDEFINED + u'\ufffe' # 0xFE -> UNDEFINED + u'\x9f' # 0xFF -> EIGHT ONES ) ### Encoding Map @@ -298,97 +298,97 @@ 0x0002: 0x02, # START OF TEXT 0x0003: 0x03, # END OF TEXT 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2d, # ENQUIRY - 0x0006: 0x2e, # ACKNOWLEDGE - 0x0007: 0x2f, # BELL + 0x0005: 0x2D, # ENQUIRY + 0x0006: 0x2E, # ACKNOWLEDGE + 0x0007: 0x2F, # BELL 0x0008: 0x16, # BACKSPACE 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000a: 0x25, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x25, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3c, # DEVICE CONTROL FOUR - 0x0015: 0x3d, # NEGATIVE ACKNOWLEDGE + 0x0014: 0x3C, # DEVICE CONTROL FOUR + 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE 0x0016: 0x32, # SYNCHRONOUS IDLE 0x0017: 0x26, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x3f, # SUBSTITUTE - 0x001b: 0x27, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x3F, # SUBSTITUTE + 0x001B: 0x27, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x40, # SPACE - 0x0021: 0x5a, # EXCLAMATION MARK - 0x0022: 0x7f, # QUOTATION MARK - 0x0023: 0x7b, # NUMBER SIGN - 0x0024: 0x5b, # DOLLAR SIGN - 0x0025: 0x6c, # PERCENT SIGN + 0x0021: 0x5A, # EXCLAMATION MARK + 0x0022: 0x7F, # QUOTATION MARK + 0x0023: 0x7B, # NUMBER SIGN + 0x0024: 0x5B, # DOLLAR SIGN + 0x0025: 0x6C, # PERCENT SIGN 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7d, # APOSTROPHE - 0x0028: 0x4d, # LEFT PARENTHESIS - 0x0029: 0x5d, # RIGHT PARENTHESIS - 0x002a: 0x5c, # ASTERISK - 0x002b: 0x4e, # PLUS SIGN - 0x002c: 0x6b, # COMMA - 0x002d: 0x60, # HYPHEN-MINUS - 0x002e: 0x4b, # FULL STOP - 0x002f: 0x61, # SOLIDUS - 0x0030: 0xf0, # DIGIT ZERO - 0x0031: 0xf1, # DIGIT ONE - 0x0032: 0xf2, # DIGIT TWO - 0x0033: 0xf3, # DIGIT THREE - 0x0034: 0xf4, # DIGIT FOUR - 0x0035: 0xf5, # DIGIT FIVE - 0x0036: 0xf6, # DIGIT SIX - 0x0037: 0xf7, # DIGIT SEVEN - 0x0038: 0xf8, # DIGIT EIGHT - 0x0039: 0xf9, # DIGIT NINE - 0x003a: 0x7a, # COLON - 0x003b: 0x5e, # SEMICOLON - 0x003c: 0x4c, # LESS-THAN SIGN - 0x003d: 0x7e, # EQUALS SIGN - 0x003e: 0x6e, # GREATER-THAN SIGN - 0x003f: 0x6f, # QUESTION MARK - 0x0040: 0x7c, # COMMERCIAL AT - 0x0041: 0xc1, # LATIN CAPITAL LETTER A - 0x0042: 0xc2, # LATIN CAPITAL LETTER B - 0x0043: 0xc3, # LATIN CAPITAL LETTER C - 0x0044: 0xc4, # LATIN CAPITAL LETTER D - 0x0045: 0xc5, # LATIN CAPITAL LETTER E - 0x0046: 0xc6, # LATIN CAPITAL LETTER F - 0x0047: 0xc7, # LATIN CAPITAL LETTER G - 0x0048: 0xc8, # LATIN CAPITAL LETTER H - 0x0049: 0xc9, # LATIN CAPITAL LETTER I - 0x004a: 0xd1, # LATIN CAPITAL LETTER J - 0x004b: 0xd2, # LATIN CAPITAL LETTER K - 0x004c: 0xd3, # LATIN CAPITAL LETTER L - 0x004d: 0xd4, # LATIN CAPITAL LETTER M - 0x004e: 0xd5, # LATIN CAPITAL LETTER N - 0x004f: 0xd6, # LATIN CAPITAL LETTER O - 0x0050: 0xd7, # LATIN CAPITAL LETTER P - 0x0051: 0xd8, # LATIN CAPITAL LETTER Q - 0x0052: 0xd9, # LATIN CAPITAL LETTER R - 0x0053: 0xe2, # LATIN CAPITAL LETTER S - 0x0054: 0xe3, # LATIN CAPITAL LETTER T - 0x0055: 0xe4, # LATIN CAPITAL LETTER U - 0x0056: 0xe5, # LATIN CAPITAL LETTER V - 0x0057: 0xe6, # LATIN CAPITAL LETTER W - 0x0058: 0xe7, # LATIN CAPITAL LETTER X - 0x0059: 0xe8, # LATIN CAPITAL LETTER Y - 0x005a: 0xe9, # LATIN CAPITAL LETTER Z - 0x005b: 0xba, # LEFT SQUARE BRACKET - 0x005c: 0xe0, # REVERSE SOLIDUS - 0x005d: 0xbb, # RIGHT SQUARE BRACKET - 0x005e: 0xb0, # CIRCUMFLEX ACCENT - 0x005f: 0x6d, # LOW LINE + 0x0027: 0x7D, # APOSTROPHE + 0x0028: 0x4D, # LEFT PARENTHESIS + 0x0029: 0x5D, # RIGHT PARENTHESIS + 0x002A: 0x5C, # ASTERISK + 0x002B: 0x4E, # PLUS SIGN + 0x002C: 0x6B, # COMMA + 0x002D: 0x60, # HYPHEN-MINUS + 0x002E: 0x4B, # FULL STOP + 0x002F: 0x61, # SOLIDUS + 0x0030: 0xF0, # DIGIT ZERO + 0x0031: 0xF1, # DIGIT ONE + 0x0032: 0xF2, # DIGIT TWO + 0x0033: 0xF3, # DIGIT THREE + 0x0034: 0xF4, # DIGIT FOUR + 0x0035: 0xF5, # DIGIT FIVE + 0x0036: 0xF6, # DIGIT SIX + 0x0037: 0xF7, # DIGIT SEVEN + 0x0038: 0xF8, # DIGIT EIGHT + 0x0039: 0xF9, # DIGIT NINE + 0x003A: 0x7A, # COLON + 0x003B: 0x5E, # SEMICOLON + 0x003C: 0x4C, # LESS-THAN SIGN + 0x003D: 0x7E, # EQUALS SIGN + 0x003E: 0x6E, # GREATER-THAN SIGN + 0x003F: 0x6F, # QUESTION MARK + 0x0040: 0x7C, # COMMERCIAL AT + 0x0041: 0xC1, # LATIN CAPITAL LETTER A + 0x0042: 0xC2, # LATIN CAPITAL LETTER B + 0x0043: 0xC3, # LATIN CAPITAL LETTER C + 0x0044: 0xC4, # LATIN CAPITAL LETTER D + 0x0045: 0xC5, # LATIN CAPITAL LETTER E + 0x0046: 0xC6, # LATIN CAPITAL LETTER F + 0x0047: 0xC7, # LATIN CAPITAL LETTER G + 0x0048: 0xC8, # LATIN CAPITAL LETTER H + 0x0049: 0xC9, # LATIN CAPITAL LETTER I + 0x004A: 0xD1, # LATIN CAPITAL LETTER J + 0x004B: 0xD2, # LATIN CAPITAL LETTER K + 0x004C: 0xD3, # LATIN CAPITAL LETTER L + 0x004D: 0xD4, # LATIN CAPITAL LETTER M + 0x004E: 0xD5, # LATIN CAPITAL LETTER N + 0x004F: 0xD6, # LATIN CAPITAL LETTER O + 0x0050: 0xD7, # LATIN CAPITAL LETTER P + 0x0051: 0xD8, # LATIN CAPITAL LETTER Q + 0x0052: 0xD9, # LATIN CAPITAL LETTER R + 0x0053: 0xE2, # LATIN CAPITAL LETTER S + 0x0054: 0xE3, # LATIN CAPITAL LETTER T + 0x0055: 0xE4, # LATIN CAPITAL LETTER U + 0x0056: 0xE5, # LATIN CAPITAL LETTER V + 0x0057: 0xE6, # LATIN CAPITAL LETTER W + 0x0058: 0xE7, # LATIN CAPITAL LETTER X + 0x0059: 0xE8, # LATIN CAPITAL LETTER Y + 0x005A: 0xE9, # LATIN CAPITAL LETTER Z + 0x005B: 0xBA, # LEFT SQUARE BRACKET + 0x005C: 0xE0, # REVERSE SOLIDUS + 0x005D: 0xBB, # RIGHT SQUARE BRACKET + 0x005E: 0xB0, # CIRCUMFLEX ACCENT + 0x005F: 0x6D, # LOW LINE 0x0060: 0x79, # GRAVE ACCENT 0x0061: 0x81, # LATIN SMALL LETTER A 0x0062: 0x82, # LATIN SMALL LETTER B @@ -399,28 +399,28 @@ 0x0067: 0x87, # LATIN SMALL LETTER G 0x0068: 0x88, # LATIN SMALL LETTER H 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006a: 0x91, # LATIN SMALL LETTER J - 0x006b: 0x92, # LATIN SMALL LETTER K - 0x006c: 0x93, # LATIN SMALL LETTER L - 0x006d: 0x94, # LATIN SMALL LETTER M - 0x006e: 0x95, # LATIN SMALL LETTER N - 0x006f: 0x96, # LATIN SMALL LETTER O + 0x006A: 0x91, # LATIN SMALL LETTER J + 0x006B: 0x92, # LATIN SMALL LETTER K + 0x006C: 0x93, # LATIN SMALL LETTER L + 0x006D: 0x94, # LATIN SMALL LETTER M + 0x006E: 0x95, # LATIN SMALL LETTER N + 0x006F: 0x96, # LATIN SMALL LETTER O 0x0070: 0x97, # LATIN SMALL LETTER P 0x0071: 0x98, # LATIN SMALL LETTER Q 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xa2, # LATIN SMALL LETTER S - 0x0074: 0xa3, # LATIN SMALL LETTER T - 0x0075: 0xa4, # LATIN SMALL LETTER U - 0x0076: 0xa5, # LATIN SMALL LETTER V - 0x0077: 0xa6, # LATIN SMALL LETTER W - 0x0078: 0xa7, # LATIN SMALL LETTER X - 0x0079: 0xa8, # LATIN SMALL LETTER Y - 0x007a: 0xa9, # LATIN SMALL LETTER Z - 0x007b: 0xc0, # LEFT CURLY BRACKET - 0x007c: 0x4f, # VERTICAL LINE - 0x007d: 0xd0, # RIGHT CURLY BRACKET - 0x007e: 0xa1, # TILDE - 0x007f: 0x07, # DELETE + 0x0073: 0xA2, # LATIN SMALL LETTER S + 0x0074: 0xA3, # LATIN SMALL LETTER T + 0x0075: 0xA4, # LATIN SMALL LETTER U + 0x0076: 0xA5, # LATIN SMALL LETTER V + 0x0077: 0xA6, # LATIN SMALL LETTER W + 0x0078: 0xA7, # LATIN SMALL LETTER X + 0x0079: 0xA8, # LATIN SMALL LETTER Y + 0x007A: 0xA9, # LATIN SMALL LETTER Z + 0x007B: 0xC0, # LEFT CURLY BRACKET + 0x007C: 0x4F, # VERTICAL LINE + 0x007D: 0xD0, # RIGHT CURLY BRACKET + 0x007E: 0xA1, # TILDE + 0x007F: 0x07, # DELETE 0x0080: 0x20, # DIGIT SELECT 0x0081: 0x21, # START OF SIGNIFICANCE 0x0082: 0x22, # FIELD SEPARATOR @@ -431,15 +431,15 @@ 0x0087: 0x17, # PROGRAM OPERATOR COMMUNICATION 0x0088: 0x28, # SET ATTRIBUTE 0x0089: 0x29, # START FIELD EXTENDED - 0x008a: 0x2a, # SET MODE OR SWITCH - 0x008b: 0x2b, # CONTROL SEQUENCE PREFIX - 0x008c: 0x2c, # MODIFY FIELD ATTRIBUTE - 0x008d: 0x09, # SUPERSCRIPT - 0x008e: 0x0a, # REPEAT - 0x008f: 0x1b, # CUSTOMER USE ONE + 0x008A: 0x2A, # SET MODE OR SWITCH + 0x008B: 0x2B, # CONTROL SEQUENCE PREFIX + 0x008C: 0x2C, # MODIFY FIELD ATTRIBUTE + 0x008D: 0x09, # SUPERSCRIPT + 0x008E: 0x0A, # REPEAT + 0x008F: 0x1B, # CUSTOMER USE ONE 0x0090: 0x30, # 0x0091: 0x31, # - 0x0092: 0x1a, # UNIT BACK SPACE + 0x0092: 0x1A, # UNIT BACK SPACE 0x0093: 0x33, # INDEX RETURN 0x0094: 0x34, # PRESENTATION POSITION 0x0095: 0x35, # TRANSPARENT @@ -447,68 +447,69 @@ 0x0097: 0x08, # GRAPHIC ESCAPE 0x0098: 0x38, # SUBSCRIPT 0x0099: 0x39, # INDENT TABULATION - 0x009a: 0x3a, # REVERSE FORM FEED - 0x009b: 0x3b, # CUSTOMER USE THREE - 0x009c: 0x04, # SELECT - 0x009d: 0x14, # RESTORE/ENABLE PRESENTATION - 0x009e: 0x3e, # - 0x009f: 0xff, # EIGHT ONES - 0x00a0: 0x74, # NO-BREAK SPACE - 0x00a2: 0x4a, # CENT SIGN - 0x00a3: 0xb1, # POUND SIGN - 0x00a4: 0x9f, # CURRENCY SIGN - 0x00a5: 0xb2, # YEN SIGN - 0x00a6: 0x6a, # BROKEN BAR - 0x00a7: 0xb5, # SECTION SIGN - 0x00a8: 0xbd, # DIAERESIS - 0x00a9: 0xb4, # COPYRIGHT SIGN - 0x00ab: 0x8a, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x5f, # NOT SIGN - 0x00ad: 0xca, # SOFT HYPHEN - 0x00ae: 0xaf, # REGISTERED SIGN - 0x00af: 0xbc, # MACRON - 0x00b0: 0x90, # DEGREE SIGN - 0x00b1: 0x8f, # PLUS-MINUS SIGN - 0x00b2: 0xea, # SUPERSCRIPT TWO - 0x00b3: 0xfa, # SUPERSCRIPT THREE - 0x00b4: 0xbe, # ACUTE ACCENT - 0x00b5: 0xa0, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb3, # MIDDLE DOT - 0x00b8: 0x9d, # CEDILLA - 0x00b9: 0xda, # SUPERSCRIPT ONE - 0x00bb: 0x8b, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xb7, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xb8, # VULGAR FRACTION ONE HALF - 0x00be: 0xb9, # VULGAR FRACTION THREE QUARTERS - 0x00d7: 0xbf, # MULTIPLICATION SIGN - 0x00f7: 0xe1, # DIVISION SIGN - 0x05d0: 0x41, # HEBREW LETTER ALEF - 0x05d1: 0x42, # HEBREW LETTER BET - 0x05d2: 0x43, # HEBREW LETTER GIMEL - 0x05d3: 0x44, # HEBREW LETTER DALET - 0x05d4: 0x45, # HEBREW LETTER HE - 0x05d5: 0x46, # HEBREW LETTER VAV - 0x05d6: 0x47, # HEBREW LETTER ZAYIN - 0x05d7: 0x48, # HEBREW LETTER HET - 0x05d8: 0x49, # HEBREW LETTER TET - 0x05d9: 0x51, # HEBREW LETTER YOD - 0x05da: 0x52, # HEBREW LETTER FINAL KAF - 0x05db: 0x53, # HEBREW LETTER KAF - 0x05dc: 0x54, # HEBREW LETTER LAMED - 0x05dd: 0x55, # HEBREW LETTER FINAL MEM - 0x05de: 0x56, # HEBREW LETTER MEM - 0x05df: 0x57, # HEBREW LETTER FINAL NUN - 0x05e0: 0x58, # HEBREW LETTER NUN - 0x05e1: 0x59, # HEBREW LETTER SAMEKH - 0x05e2: 0x62, # HEBREW LETTER AYIN - 0x05e3: 0x63, # HEBREW LETTER FINAL PE - 0x05e4: 0x64, # HEBREW LETTER PE - 0x05e5: 0x65, # HEBREW LETTER FINAL TSADI - 0x05e6: 0x66, # HEBREW LETTER TSADI - 0x05e7: 0x67, # HEBREW LETTER QOF - 0x05e8: 0x68, # HEBREW LETTER RESH - 0x05e9: 0x69, # HEBREW LETTER SHIN - 0x05ea: 0x71, # HEBREW LETTER TAV + 0x009A: 0x3A, # REVERSE FORM FEED + 0x009B: 0x3B, # CUSTOMER USE THREE + 0x009C: 0x04, # SELECT + 0x009D: 0x14, # RESTORE/ENABLE PRESENTATION + 0x009E: 0x3E, # + 0x009F: 0xFF, # EIGHT ONES + 0x00A0: 0x74, # NO-BREAK SPACE + 0x00A2: 0x4A, # CENT SIGN + 0x00A3: 0xB1, # POUND SIGN + 0x00A4: 0x9F, # CURRENCY SIGN + 0x00A5: 0xB2, # YEN SIGN + 0x00A6: 0x6A, # BROKEN BAR + 0x00A7: 0xB5, # SECTION SIGN + 0x00A8: 0xBD, # DIAERESIS + 0x00A9: 0xB4, # COPYRIGHT SIGN + 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0x5F, # NOT SIGN + 0x00AD: 0xCA, # SOFT HYPHEN + 0x00AE: 0xAF, # REGISTERED SIGN + 0x00AF: 0xBC, # MACRON + 0x00B0: 0x90, # DEGREE SIGN + 0x00B1: 0x8F, # PLUS-MINUS SIGN + 0x00B2: 0xEA, # SUPERSCRIPT TWO + 0x00B3: 0xFA, # SUPERSCRIPT THREE + 0x00B4: 0xBE, # ACUTE ACCENT + 0x00B5: 0xA0, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB3, # MIDDLE DOT + 0x00B8: 0x9D, # CEDILLA + 0x00B9: 0xDA, # SUPERSCRIPT ONE + 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF + 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS + 0x00D7: 0xBF, # MULTIPLICATION SIGN + 0x00F7: 0xE1, # DIVISION SIGN + 0x05D0: 0x41, # HEBREW LETTER ALEF + 0x05D1: 0x42, # HEBREW LETTER BET + 0x05D2: 0x43, # HEBREW LETTER GIMEL + 0x05D3: 0x44, # HEBREW LETTER DALET + 0x05D4: 0x45, # HEBREW LETTER HE + 0x05D5: 0x46, # HEBREW LETTER VAV + 0x05D6: 0x47, # HEBREW LETTER ZAYIN + 0x05D7: 0x48, # HEBREW LETTER HET + 0x05D8: 0x49, # HEBREW LETTER TET + 0x05D9: 0x51, # HEBREW LETTER YOD + 0x05DA: 0x52, # HEBREW LETTER FINAL KAF + 0x05DB: 0x53, # HEBREW LETTER KAF + 0x05DC: 0x54, # HEBREW LETTER LAMED + 0x05DD: 0x55, # HEBREW LETTER FINAL MEM + 0x05DE: 0x56, # HEBREW LETTER MEM + 0x05DF: 0x57, # HEBREW LETTER FINAL NUN + 0x05E0: 0x58, # HEBREW LETTER NUN + 0x05E1: 0x59, # HEBREW LETTER SAMEKH + 0x05E2: 0x62, # HEBREW LETTER AYIN + 0x05E3: 0x63, # HEBREW LETTER FINAL PE + 0x05E4: 0x64, # HEBREW LETTER PE + 0x05E5: 0x65, # HEBREW LETTER FINAL TSADI + 0x05E6: 0x66, # HEBREW LETTER TSADI + 0x05E7: 0x67, # HEBREW LETTER QOF + 0x05E8: 0x68, # HEBREW LETTER RESH + 0x05E9: 0x69, # HEBREW LETTER SHIN + 0x05EA: 0x71, # HEBREW LETTER TAV 0x2017: 0x78, # DOUBLE LOW LINE -} \ No newline at end of file +} + Index: cp500.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp500.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp500.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp500.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x7f' # 0x07 -> DELETE u'\x97' # 0x08 -> CONTROL u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0a -> CONTROL - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\x8e' # 0x0A -> CONTROL + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x87' # 0x17 -> CONTROL u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1a -> CONTROL - u'\x8f' # 0x1b -> CONTROL - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x92' # 0x1A -> CONTROL + u'\x8f' # 0x1B -> CONTROL + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u'\x80' # 0x20 -> CONTROL u'\x81' # 0x21 -> CONTROL u'\x82' # 0x22 -> CONTROL @@ -74,12 +74,12 @@ u'\x1b' # 0x27 -> ESCAPE u'\x88' # 0x28 -> CONTROL u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2a -> CONTROL - u'\x8b' # 0x2b -> CONTROL - u'\x8c' # 0x2c -> CONTROL - u'\x05' # 0x2d -> ENQUIRY - u'\x06' # 0x2e -> ACKNOWLEDGE - u'\x07' # 0x2f -> BELL + u'\x8a' # 0x2A -> CONTROL + u'\x8b' # 0x2B -> CONTROL + u'\x8c' # 0x2C -> CONTROL + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL u'\x90' # 0x30 -> CONTROL u'\x91' # 0x31 -> CONTROL u'\x16' # 0x32 -> SYNCHRONOUS IDLE @@ -90,12 +90,12 @@ u'\x04' # 0x37 -> END OF TRANSMISSION u'\x98' # 0x38 -> CONTROL u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3a -> CONTROL - u'\x9b' # 0x3b -> CONTROL - u'\x14' # 0x3c -> DEVICE CONTROL FOUR - u'\x15' # 0x3d -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3e -> CONTROL - u'\x1a' # 0x3f -> SUBSTITUTE + u'\x9a' # 0x3A -> CONTROL + u'\x9b' # 0x3B -> CONTROL + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> CONTROL + u'\x1a' # 0x3F -> SUBSTITUTE u' ' # 0x40 -> SPACE u'\xa0' # 0x41 -> NO-BREAK SPACE u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX @@ -106,12 +106,12 @@ u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'[' # 0x4a -> LEFT SQUARE BRACKET - u'.' # 0x4b -> FULL STOP - u'<' # 0x4c -> LESS-THAN SIGN - u'(' # 0x4d -> LEFT PARENTHESIS - u'+' # 0x4e -> PLUS SIGN - u'!' # 0x4f -> EXCLAMATION MARK + u'[' # 0x4A -> LEFT SQUARE BRACKET + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'!' # 0x4F -> EXCLAMATION MARK u'&' # 0x50 -> AMPERSAND u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX @@ -122,12 +122,12 @@ u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u']' # 0x5a -> RIGHT SQUARE BRACKET - u'$' # 0x5b -> DOLLAR SIGN - u'*' # 0x5c -> ASTERISK - u')' # 0x5d -> RIGHT PARENTHESIS - u';' # 0x5e -> SEMICOLON - u'^' # 0x5f -> CIRCUMFLEX ACCENT + u']' # 0x5A -> RIGHT SQUARE BRACKET + u'$' # 0x5B -> DOLLAR SIGN + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'^' # 0x5F -> CIRCUMFLEX ACCENT u'-' # 0x60 -> HYPHEN-MINUS u'/' # 0x61 -> SOLIDUS u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX @@ -138,12 +138,12 @@ u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xa6' # 0x6a -> BROKEN BAR - u',' # 0x6b -> COMMA - u'%' # 0x6c -> PERCENT SIGN - u'_' # 0x6d -> LOW LINE - u'>' # 0x6e -> GREATER-THAN SIGN - u'?' # 0x6f -> QUESTION MARK + u'\xa6' # 0x6A -> BROKEN BAR + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX @@ -154,12 +154,12 @@ u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7a -> COLON - u'#' # 0x7b -> NUMBER SIGN - u'@' # 0x7c -> COMMERCIAL AT - u"'" # 0x7d -> APOSTROPHE - u'=' # 0x7e -> EQUALS SIGN - u'"' # 0x7f -> QUOTATION MARK + u':' # 0x7A -> COLON + u'#' # 0x7B -> NUMBER SIGN + u'@' # 0x7C -> COMMERCIAL AT + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'"' # 0x7F -> QUOTATION MARK u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE u'a' # 0x81 -> LATIN SMALL LETTER A u'b' # 0x82 -> LATIN SMALL LETTER B @@ -170,12 +170,12 @@ u'g' # 0x87 -> LATIN SMALL LETTER G u'h' # 0x88 -> LATIN SMALL LETTER H u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8a -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8b -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xf0' # 0x8c -> LATIN SMALL LETTER ETH (ICELANDIC) - u'\xfd' # 0x8d -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0x8e -> LATIN SMALL LETTER THORN (ICELANDIC) - u'\xb1' # 0x8f -> PLUS-MINUS SIGN + u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) + u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) + u'\xb1' # 0x8F -> PLUS-MINUS SIGN u'\xb0' # 0x90 -> DEGREE SIGN u'j' # 0x91 -> LATIN SMALL LETTER J u'k' # 0x92 -> LATIN SMALL LETTER K @@ -186,108 +186,108 @@ u'p' # 0x97 -> LATIN SMALL LETTER P u'q' # 0x98 -> LATIN SMALL LETTER Q u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9a -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9b -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9c -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9d -> CEDILLA - u'\xc6' # 0x9e -> LATIN CAPITAL LIGATURE AE - u'\xa4' # 0x9f -> CURRENCY SIGN - u'\xb5' # 0xa0 -> MICRO SIGN - u'~' # 0xa1 -> TILDE - u's' # 0xa2 -> LATIN SMALL LETTER S - u't' # 0xa3 -> LATIN SMALL LETTER T - u'u' # 0xa4 -> LATIN SMALL LETTER U - u'v' # 0xa5 -> LATIN SMALL LETTER V - u'w' # 0xa6 -> LATIN SMALL LETTER W - u'x' # 0xa7 -> LATIN SMALL LETTER X - u'y' # 0xa8 -> LATIN SMALL LETTER Y - u'z' # 0xa9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xaa -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xab -> INVERTED QUESTION MARK - u'\xd0' # 0xac -> LATIN CAPITAL LETTER ETH (ICELANDIC) - u'\xdd' # 0xad -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xae -> LATIN CAPITAL LETTER THORN (ICELANDIC) - u'\xae' # 0xaf -> REGISTERED SIGN - u'\xa2' # 0xb0 -> CENT SIGN - u'\xa3' # 0xb1 -> POUND SIGN - u'\xa5' # 0xb2 -> YEN SIGN - u'\xb7' # 0xb3 -> MIDDLE DOT - u'\xa9' # 0xb4 -> COPYRIGHT SIGN - u'\xa7' # 0xb5 -> SECTION SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xbc' # 0xb7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xb8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xb9 -> VULGAR FRACTION THREE QUARTERS - u'\xac' # 0xba -> NOT SIGN - u'|' # 0xbb -> VERTICAL LINE - u'\xaf' # 0xbc -> MACRON - u'\xa8' # 0xbd -> DIAERESIS - u'\xb4' # 0xbe -> ACUTE ACCENT - u'\xd7' # 0xbf -> MULTIPLICATION SIGN - u'{' # 0xc0 -> LEFT CURLY BRACKET - u'A' # 0xc1 -> LATIN CAPITAL LETTER A - u'B' # 0xc2 -> LATIN CAPITAL LETTER B - u'C' # 0xc3 -> LATIN CAPITAL LETTER C - u'D' # 0xc4 -> LATIN CAPITAL LETTER D - u'E' # 0xc5 -> LATIN CAPITAL LETTER E - u'F' # 0xc6 -> LATIN CAPITAL LETTER F - u'G' # 0xc7 -> LATIN CAPITAL LETTER G - u'H' # 0xc8 -> LATIN CAPITAL LETTER H - u'I' # 0xc9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xca -> SOFT HYPHEN - u'\xf4' # 0xcb -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0xcc -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0xcd -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xce -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xcf -> LATIN SMALL LETTER O WITH TILDE - u'}' # 0xd0 -> RIGHT CURLY BRACKET - u'J' # 0xd1 -> LATIN CAPITAL LETTER J - u'K' # 0xd2 -> LATIN CAPITAL LETTER K - u'L' # 0xd3 -> LATIN CAPITAL LETTER L - u'M' # 0xd4 -> LATIN CAPITAL LETTER M - u'N' # 0xd5 -> LATIN CAPITAL LETTER N - u'O' # 0xd6 -> LATIN CAPITAL LETTER O - u'P' # 0xd7 -> LATIN CAPITAL LETTER P - u'Q' # 0xd8 -> LATIN CAPITAL LETTER Q - u'R' # 0xd9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xda -> SUPERSCRIPT ONE - u'\xfb' # 0xdb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xdc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf9' # 0xdd -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xde -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xdf -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\\' # 0xe0 -> REVERSE SOLIDUS - u'\xf7' # 0xe1 -> DIVISION SIGN - u'S' # 0xe2 -> LATIN CAPITAL LETTER S - u'T' # 0xe3 -> LATIN CAPITAL LETTER T - u'U' # 0xe4 -> LATIN CAPITAL LETTER U - u'V' # 0xe5 -> LATIN CAPITAL LETTER V - u'W' # 0xe6 -> LATIN CAPITAL LETTER W - u'X' # 0xe7 -> LATIN CAPITAL LETTER X - u'Y' # 0xe8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xe9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xea -> SUPERSCRIPT TWO - u'\xd4' # 0xeb -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd6' # 0xec -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd2' # 0xed -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xee -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xef -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xf0 -> DIGIT ZERO - u'1' # 0xf1 -> DIGIT ONE - u'2' # 0xf2 -> DIGIT TWO - u'3' # 0xf3 -> DIGIT THREE - u'4' # 0xf4 -> DIGIT FOUR - u'5' # 0xf5 -> DIGIT FIVE - u'6' # 0xf6 -> DIGIT SIX - u'7' # 0xf7 -> DIGIT SEVEN - u'8' # 0xf8 -> DIGIT EIGHT - u'9' # 0xf9 -> DIGIT NINE - u'\xb3' # 0xfa -> SUPERSCRIPT THREE - u'\xdb' # 0xfb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xfc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xd9' # 0xfd -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xfe -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xff -> CONTROL + u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR + u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE + u'\xb8' # 0x9D -> CEDILLA + u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE + u'\xa4' # 0x9F -> CURRENCY SIGN + u'\xb5' # 0xA0 -> MICRO SIGN + u'~' # 0xA1 -> TILDE + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK + u'\xbf' # 0xAB -> INVERTED QUESTION MARK + u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) + u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) + u'\xae' # 0xAF -> REGISTERED SIGN + u'\xa2' # 0xB0 -> CENT SIGN + u'\xa3' # 0xB1 -> POUND SIGN + u'\xa5' # 0xB2 -> YEN SIGN + u'\xb7' # 0xB3 -> MIDDLE DOT + u'\xa9' # 0xB4 -> COPYRIGHT SIGN + u'\xa7' # 0xB5 -> SECTION SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS + u'\xac' # 0xBA -> NOT SIGN + u'|' # 0xBB -> VERTICAL LINE + u'\xaf' # 0xBC -> MACRON + u'\xa8' # 0xBD -> DIAERESIS + u'\xb4' # 0xBE -> ACUTE ACCENT + u'\xd7' # 0xBF -> MULTIPLICATION SIGN + u'{' # 0xC0 -> LEFT CURLY BRACKET + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE + u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE + u'}' # 0xD0 -> RIGHT CURLY BRACKET + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xDA -> SUPERSCRIPT ONE + u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE + u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\\' # 0xE0 -> REVERSE SOLIDUS + u'\xf7' # 0xE1 -> DIVISION SIGN + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE + u'\x9f' # 0xFF -> CONTROL ) ### Encoding Map @@ -298,97 +298,97 @@ 0x0002: 0x02, # START OF TEXT 0x0003: 0x03, # END OF TEXT 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2d, # ENQUIRY - 0x0006: 0x2e, # ACKNOWLEDGE - 0x0007: 0x2f, # BELL + 0x0005: 0x2D, # ENQUIRY + 0x0006: 0x2E, # ACKNOWLEDGE + 0x0007: 0x2F, # BELL 0x0008: 0x16, # BACKSPACE 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000a: 0x25, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x25, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3c, # DEVICE CONTROL FOUR - 0x0015: 0x3d, # NEGATIVE ACKNOWLEDGE + 0x0014: 0x3C, # DEVICE CONTROL FOUR + 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE 0x0016: 0x32, # SYNCHRONOUS IDLE 0x0017: 0x26, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x3f, # SUBSTITUTE - 0x001b: 0x27, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x3F, # SUBSTITUTE + 0x001B: 0x27, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x40, # SPACE - 0x0021: 0x4f, # EXCLAMATION MARK - 0x0022: 0x7f, # QUOTATION MARK - 0x0023: 0x7b, # NUMBER SIGN - 0x0024: 0x5b, # DOLLAR SIGN - 0x0025: 0x6c, # PERCENT SIGN + 0x0021: 0x4F, # EXCLAMATION MARK + 0x0022: 0x7F, # QUOTATION MARK + 0x0023: 0x7B, # NUMBER SIGN + 0x0024: 0x5B, # DOLLAR SIGN + 0x0025: 0x6C, # PERCENT SIGN 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7d, # APOSTROPHE - 0x0028: 0x4d, # LEFT PARENTHESIS - 0x0029: 0x5d, # RIGHT PARENTHESIS - 0x002a: 0x5c, # ASTERISK - 0x002b: 0x4e, # PLUS SIGN - 0x002c: 0x6b, # COMMA - 0x002d: 0x60, # HYPHEN-MINUS - 0x002e: 0x4b, # FULL STOP - 0x002f: 0x61, # SOLIDUS - 0x0030: 0xf0, # DIGIT ZERO - 0x0031: 0xf1, # DIGIT ONE - 0x0032: 0xf2, # DIGIT TWO - 0x0033: 0xf3, # DIGIT THREE - 0x0034: 0xf4, # DIGIT FOUR - 0x0035: 0xf5, # DIGIT FIVE - 0x0036: 0xf6, # DIGIT SIX - 0x0037: 0xf7, # DIGIT SEVEN - 0x0038: 0xf8, # DIGIT EIGHT - 0x0039: 0xf9, # DIGIT NINE - 0x003a: 0x7a, # COLON - 0x003b: 0x5e, # SEMICOLON - 0x003c: 0x4c, # LESS-THAN SIGN - 0x003d: 0x7e, # EQUALS SIGN - 0x003e: 0x6e, # GREATER-THAN SIGN - 0x003f: 0x6f, # QUESTION MARK - 0x0040: 0x7c, # COMMERCIAL AT - 0x0041: 0xc1, # LATIN CAPITAL LETTER A - 0x0042: 0xc2, # LATIN CAPITAL LETTER B - 0x0043: 0xc3, # LATIN CAPITAL LETTER C - 0x0044: 0xc4, # LATIN CAPITAL LETTER D - 0x0045: 0xc5, # LATIN CAPITAL LETTER E - 0x0046: 0xc6, # LATIN CAPITAL LETTER F - 0x0047: 0xc7, # LATIN CAPITAL LETTER G - 0x0048: 0xc8, # LATIN CAPITAL LETTER H - 0x0049: 0xc9, # LATIN CAPITAL LETTER I - 0x004a: 0xd1, # LATIN CAPITAL LETTER J - 0x004b: 0xd2, # LATIN CAPITAL LETTER K - 0x004c: 0xd3, # LATIN CAPITAL LETTER L - 0x004d: 0xd4, # LATIN CAPITAL LETTER M - 0x004e: 0xd5, # LATIN CAPITAL LETTER N - 0x004f: 0xd6, # LATIN CAPITAL LETTER O - 0x0050: 0xd7, # LATIN CAPITAL LETTER P - 0x0051: 0xd8, # LATIN CAPITAL LETTER Q - 0x0052: 0xd9, # LATIN CAPITAL LETTER R - 0x0053: 0xe2, # LATIN CAPITAL LETTER S - 0x0054: 0xe3, # LATIN CAPITAL LETTER T - 0x0055: 0xe4, # LATIN CAPITAL LETTER U - 0x0056: 0xe5, # LATIN CAPITAL LETTER V - 0x0057: 0xe6, # LATIN CAPITAL LETTER W - 0x0058: 0xe7, # LATIN CAPITAL LETTER X - 0x0059: 0xe8, # LATIN CAPITAL LETTER Y - 0x005a: 0xe9, # LATIN CAPITAL LETTER Z - 0x005b: 0x4a, # LEFT SQUARE BRACKET - 0x005c: 0xe0, # REVERSE SOLIDUS - 0x005d: 0x5a, # RIGHT SQUARE BRACKET - 0x005e: 0x5f, # CIRCUMFLEX ACCENT - 0x005f: 0x6d, # LOW LINE + 0x0027: 0x7D, # APOSTROPHE + 0x0028: 0x4D, # LEFT PARENTHESIS + 0x0029: 0x5D, # RIGHT PARENTHESIS + 0x002A: 0x5C, # ASTERISK + 0x002B: 0x4E, # PLUS SIGN + 0x002C: 0x6B, # COMMA + 0x002D: 0x60, # HYPHEN-MINUS + 0x002E: 0x4B, # FULL STOP + 0x002F: 0x61, # SOLIDUS + 0x0030: 0xF0, # DIGIT ZERO + 0x0031: 0xF1, # DIGIT ONE + 0x0032: 0xF2, # DIGIT TWO + 0x0033: 0xF3, # DIGIT THREE + 0x0034: 0xF4, # DIGIT FOUR + 0x0035: 0xF5, # DIGIT FIVE + 0x0036: 0xF6, # DIGIT SIX + 0x0037: 0xF7, # DIGIT SEVEN + 0x0038: 0xF8, # DIGIT EIGHT + 0x0039: 0xF9, # DIGIT NINE + 0x003A: 0x7A, # COLON + 0x003B: 0x5E, # SEMICOLON + 0x003C: 0x4C, # LESS-THAN SIGN + 0x003D: 0x7E, # EQUALS SIGN + 0x003E: 0x6E, # GREATER-THAN SIGN + 0x003F: 0x6F, # QUESTION MARK + 0x0040: 0x7C, # COMMERCIAL AT + 0x0041: 0xC1, # LATIN CAPITAL LETTER A + 0x0042: 0xC2, # LATIN CAPITAL LETTER B + 0x0043: 0xC3, # LATIN CAPITAL LETTER C + 0x0044: 0xC4, # LATIN CAPITAL LETTER D + 0x0045: 0xC5, # LATIN CAPITAL LETTER E + 0x0046: 0xC6, # LATIN CAPITAL LETTER F + 0x0047: 0xC7, # LATIN CAPITAL LETTER G + 0x0048: 0xC8, # LATIN CAPITAL LETTER H + 0x0049: 0xC9, # LATIN CAPITAL LETTER I + 0x004A: 0xD1, # LATIN CAPITAL LETTER J + 0x004B: 0xD2, # LATIN CAPITAL LETTER K + 0x004C: 0xD3, # LATIN CAPITAL LETTER L + 0x004D: 0xD4, # LATIN CAPITAL LETTER M + 0x004E: 0xD5, # LATIN CAPITAL LETTER N + 0x004F: 0xD6, # LATIN CAPITAL LETTER O + 0x0050: 0xD7, # LATIN CAPITAL LETTER P + 0x0051: 0xD8, # LATIN CAPITAL LETTER Q + 0x0052: 0xD9, # LATIN CAPITAL LETTER R + 0x0053: 0xE2, # LATIN CAPITAL LETTER S + 0x0054: 0xE3, # LATIN CAPITAL LETTER T + 0x0055: 0xE4, # LATIN CAPITAL LETTER U + 0x0056: 0xE5, # LATIN CAPITAL LETTER V + 0x0057: 0xE6, # LATIN CAPITAL LETTER W + 0x0058: 0xE7, # LATIN CAPITAL LETTER X + 0x0059: 0xE8, # LATIN CAPITAL LETTER Y + 0x005A: 0xE9, # LATIN CAPITAL LETTER Z + 0x005B: 0x4A, # LEFT SQUARE BRACKET + 0x005C: 0xE0, # REVERSE SOLIDUS + 0x005D: 0x5A, # RIGHT SQUARE BRACKET + 0x005E: 0x5F, # CIRCUMFLEX ACCENT + 0x005F: 0x6D, # LOW LINE 0x0060: 0x79, # GRAVE ACCENT 0x0061: 0x81, # LATIN SMALL LETTER A 0x0062: 0x82, # LATIN SMALL LETTER B @@ -399,28 +399,28 @@ 0x0067: 0x87, # LATIN SMALL LETTER G 0x0068: 0x88, # LATIN SMALL LETTER H 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006a: 0x91, # LATIN SMALL LETTER J - 0x006b: 0x92, # LATIN SMALL LETTER K - 0x006c: 0x93, # LATIN SMALL LETTER L - 0x006d: 0x94, # LATIN SMALL LETTER M - 0x006e: 0x95, # LATIN SMALL LETTER N - 0x006f: 0x96, # LATIN SMALL LETTER O + 0x006A: 0x91, # LATIN SMALL LETTER J + 0x006B: 0x92, # LATIN SMALL LETTER K + 0x006C: 0x93, # LATIN SMALL LETTER L + 0x006D: 0x94, # LATIN SMALL LETTER M + 0x006E: 0x95, # LATIN SMALL LETTER N + 0x006F: 0x96, # LATIN SMALL LETTER O 0x0070: 0x97, # LATIN SMALL LETTER P 0x0071: 0x98, # LATIN SMALL LETTER Q 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xa2, # LATIN SMALL LETTER S - 0x0074: 0xa3, # LATIN SMALL LETTER T - 0x0075: 0xa4, # LATIN SMALL LETTER U - 0x0076: 0xa5, # LATIN SMALL LETTER V - 0x0077: 0xa6, # LATIN SMALL LETTER W - 0x0078: 0xa7, # LATIN SMALL LETTER X - 0x0079: 0xa8, # LATIN SMALL LETTER Y - 0x007a: 0xa9, # LATIN SMALL LETTER Z - 0x007b: 0xc0, # LEFT CURLY BRACKET - 0x007c: 0xbb, # VERTICAL LINE - 0x007d: 0xd0, # RIGHT CURLY BRACKET - 0x007e: 0xa1, # TILDE - 0x007f: 0x07, # DELETE + 0x0073: 0xA2, # LATIN SMALL LETTER S + 0x0074: 0xA3, # LATIN SMALL LETTER T + 0x0075: 0xA4, # LATIN SMALL LETTER U + 0x0076: 0xA5, # LATIN SMALL LETTER V + 0x0077: 0xA6, # LATIN SMALL LETTER W + 0x0078: 0xA7, # LATIN SMALL LETTER X + 0x0079: 0xA8, # LATIN SMALL LETTER Y + 0x007A: 0xA9, # LATIN SMALL LETTER Z + 0x007B: 0xC0, # LEFT CURLY BRACKET + 0x007C: 0xBB, # VERTICAL LINE + 0x007D: 0xD0, # RIGHT CURLY BRACKET + 0x007E: 0xA1, # TILDE + 0x007F: 0x07, # DELETE 0x0080: 0x20, # CONTROL 0x0081: 0x21, # CONTROL 0x0082: 0x22, # CONTROL @@ -431,15 +431,15 @@ 0x0087: 0x17, # CONTROL 0x0088: 0x28, # CONTROL 0x0089: 0x29, # CONTROL - 0x008a: 0x2a, # CONTROL - 0x008b: 0x2b, # CONTROL - 0x008c: 0x2c, # CONTROL - 0x008d: 0x09, # CONTROL - 0x008e: 0x0a, # CONTROL - 0x008f: 0x1b, # CONTROL + 0x008A: 0x2A, # CONTROL + 0x008B: 0x2B, # CONTROL + 0x008C: 0x2C, # CONTROL + 0x008D: 0x09, # CONTROL + 0x008E: 0x0A, # CONTROL + 0x008F: 0x1B, # CONTROL 0x0090: 0x30, # CONTROL 0x0091: 0x31, # CONTROL - 0x0092: 0x1a, # CONTROL + 0x0092: 0x1A, # CONTROL 0x0093: 0x33, # CONTROL 0x0094: 0x34, # CONTROL 0x0095: 0x35, # CONTROL @@ -447,106 +447,107 @@ 0x0097: 0x08, # CONTROL 0x0098: 0x38, # CONTROL 0x0099: 0x39, # CONTROL - 0x009a: 0x3a, # CONTROL - 0x009b: 0x3b, # CONTROL - 0x009c: 0x04, # CONTROL - 0x009d: 0x14, # CONTROL - 0x009e: 0x3e, # CONTROL - 0x009f: 0xff, # CONTROL - 0x00a0: 0x41, # NO-BREAK SPACE - 0x00a1: 0xaa, # INVERTED EXCLAMATION MARK - 0x00a2: 0xb0, # CENT SIGN - 0x00a3: 0xb1, # POUND SIGN - 0x00a4: 0x9f, # CURRENCY SIGN - 0x00a5: 0xb2, # YEN SIGN - 0x00a6: 0x6a, # BROKEN BAR - 0x00a7: 0xb5, # SECTION SIGN - 0x00a8: 0xbd, # DIAERESIS - 0x00a9: 0xb4, # COPYRIGHT SIGN - 0x00aa: 0x9a, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x8a, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xba, # NOT SIGN - 0x00ad: 0xca, # SOFT HYPHEN - 0x00ae: 0xaf, # REGISTERED SIGN - 0x00af: 0xbc, # MACRON - 0x00b0: 0x90, # DEGREE SIGN - 0x00b1: 0x8f, # PLUS-MINUS SIGN - 0x00b2: 0xea, # SUPERSCRIPT TWO - 0x00b3: 0xfa, # SUPERSCRIPT THREE - 0x00b4: 0xbe, # ACUTE ACCENT - 0x00b5: 0xa0, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb3, # MIDDLE DOT - 0x00b8: 0x9d, # CEDILLA - 0x00b9: 0xda, # SUPERSCRIPT ONE - 0x00ba: 0x9b, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x8b, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xb7, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xb8, # VULGAR FRACTION ONE HALF - 0x00be: 0xb9, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0xab, # INVERTED QUESTION MARK - 0x00c0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x9e, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d0: 0xac, # LATIN CAPITAL LETTER ETH (ICELANDIC) - 0x00d1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xed, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xee, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xeb, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xef, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0xec, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xbf, # MULTIPLICATION SIGN - 0x00d8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0xfd, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xfe, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xfb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0xfc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0xad, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0xae, # LATIN CAPITAL LETTER THORN (ICELANDIC) - 0x00df: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e0: 0x44, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x45, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x46, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x9c, # LATIN SMALL LIGATURE AE - 0x00e7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x54, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x51, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x58, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x55, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f0: 0x8c, # LATIN SMALL LETTER ETH (ICELANDIC) - 0x00f1: 0x49, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0xcd, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0xce, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xcb, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0xcf, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0xcc, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xe1, # DIVISION SIGN - 0x00f8: 0x70, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0xdd, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0xde, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0xdb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xdc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0x8d, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0x8e, # LATIN SMALL LETTER THORN (ICELANDIC) - 0x00ff: 0xdf, # LATIN SMALL LETTER Y WITH DIAERESIS -} \ No newline at end of file + 0x009A: 0x3A, # CONTROL + 0x009B: 0x3B, # CONTROL + 0x009C: 0x04, # CONTROL + 0x009D: 0x14, # CONTROL + 0x009E: 0x3E, # CONTROL + 0x009F: 0xFF, # CONTROL + 0x00A0: 0x41, # NO-BREAK SPACE + 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK + 0x00A2: 0xB0, # CENT SIGN + 0x00A3: 0xB1, # POUND SIGN + 0x00A4: 0x9F, # CURRENCY SIGN + 0x00A5: 0xB2, # YEN SIGN + 0x00A6: 0x6A, # BROKEN BAR + 0x00A7: 0xB5, # SECTION SIGN + 0x00A8: 0xBD, # DIAERESIS + 0x00A9: 0xB4, # COPYRIGHT SIGN + 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR + 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xBA, # NOT SIGN + 0x00AD: 0xCA, # SOFT HYPHEN + 0x00AE: 0xAF, # REGISTERED SIGN + 0x00AF: 0xBC, # MACRON + 0x00B0: 0x90, # DEGREE SIGN + 0x00B1: 0x8F, # PLUS-MINUS SIGN + 0x00B2: 0xEA, # SUPERSCRIPT TWO + 0x00B3: 0xFA, # SUPERSCRIPT THREE + 0x00B4: 0xBE, # ACUTE ACCENT + 0x00B5: 0xA0, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB3, # MIDDLE DOT + 0x00B8: 0x9D, # CEDILLA + 0x00B9: 0xDA, # SUPERSCRIPT ONE + 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF + 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xAB, # INVERTED QUESTION MARK + 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE + 0x00C7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xAC, # LATIN CAPITAL LETTER ETH (ICELANDIC) + 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xEC, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xBF, # MULTIPLICATION SIGN + 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xFC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xAD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xAE, # LATIN CAPITAL LETTER THORN (ICELANDIC) + 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE + 0x00E7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0x8C, # LATIN SMALL LETTER ETH (ICELANDIC) + 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xCC, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xE1, # DIVISION SIGN + 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xDC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0x8D, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0x8E, # LATIN SMALL LETTER THORN (ICELANDIC) + 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS +} + Index: cp856.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp856.py,v retrieving revision 1.7 retrieving revision 1.8 diff -u -d -r1.7 -r1.8 --- cp856.py 24 Oct 2005 12:07:48 -0000 1.7 +++ cp856.py 24 Oct 2005 12:14:59 -0000 1.8 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\u05d0' # 0x80 -> HEBREW LETTER ALEF u'\u05d1' # 0x81 -> HEBREW LETTER BET u'\u05d2' # 0x82 -> HEBREW LETTER GIMEL @@ -170,12 +170,12 @@ u'\u05d7' # 0x87 -> HEBREW LETTER HET u'\u05d8' # 0x88 -> HEBREW LETTER TET u'\u05d9' # 0x89 -> HEBREW LETTER YOD - u'\u05da' # 0x8a -> HEBREW LETTER FINAL KAF - u'\u05db' # 0x8b -> HEBREW LETTER KAF - u'\u05dc' # 0x8c -> HEBREW LETTER LAMED - u'\u05dd' # 0x8d -> HEBREW LETTER FINAL MEM - u'\u05de' # 0x8e -> HEBREW LETTER MEM - u'\u05df' # 0x8f -> HEBREW LETTER FINAL NUN + u'\u05da' # 0x8A -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x8B -> HEBREW LETTER KAF + u'\u05dc' # 0x8C -> HEBREW LETTER LAMED + u'\u05dd' # 0x8D -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x8E -> HEBREW LETTER MEM + u'\u05df' # 0x8F -> HEBREW LETTER FINAL NUN u'\u05e0' # 0x90 -> HEBREW LETTER NUN u'\u05e1' # 0x91 -> HEBREW LETTER SAMEKH u'\u05e2' # 0x92 -> HEBREW LETTER AYIN @@ -186,108 +186,108 @@ u'\u05e7' # 0x97 -> HEBREW LETTER QOF u'\u05e8' # 0x98 -> HEBREW LETTER RESH u'\u05e9' # 0x99 -> HEBREW LETTER SHIN - u'\u05ea' # 0x9a -> HEBREW LETTER TAV - u'\ufffe' # 0x9b -> UNDEFINED - u'\xa3' # 0x9c -> POUND SIGN - u'\ufffe' # 0x9d -> UNDEFINED - u'\xd7' # 0x9e -> MULTIPLICATION SIGN - u'\ufffe' # 0x9f -> UNDEFINED - u'\ufffe' # 0xa0 -> UNDEFINED - u'\ufffe' # 0xa1 -> UNDEFINED - u'\ufffe' # 0xa2 -> UNDEFINED - u'\ufffe' # 0xa3 -> UNDEFINED - u'\ufffe' # 0xa4 -> UNDEFINED - u'\ufffe' # 0xa5 -> UNDEFINED - u'\ufffe' # 0xa6 -> UNDEFINED - u'\ufffe' # 0xa7 -> UNDEFINED - u'\ufffe' # 0xa8 -> UNDEFINED - u'\xae' # 0xa9 -> REGISTERED SIGN - u'\xac' # 0xaa -> NOT SIGN - u'\xbd' # 0xab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0xac -> VULGAR FRACTION ONE QUARTER - u'\ufffe' # 0xad -> UNDEFINED - u'\xab' # 0xae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xaf -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0xb0 -> LIGHT SHADE - u'\u2592' # 0xb1 -> MEDIUM SHADE - u'\u2593' # 0xb2 -> DARK SHADE - u'\u2502' # 0xb3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0xb4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\ufffe' # 0xb5 -> UNDEFINED - u'\ufffe' # 0xb6 -> UNDEFINED - u'\ufffe' # 0xb7 -> UNDEFINED - u'\xa9' # 0xb8 -> COPYRIGHT SIGN - u'\u2563' # 0xb9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0xba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0xbb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0xbc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\xa2' # 0xbd -> CENT SIGN - u'\xa5' # 0xbe -> YEN SIGN - u'\u2510' # 0xbf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0xc0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0xc1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0xc2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0xc3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0xc4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0xc5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\ufffe' # 0xc6 -> UNDEFINED - u'\ufffe' # 0xc7 -> UNDEFINED - u'\u255a' # 0xc8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0xc9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0xca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0xcb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0xcc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0xcd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0xce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0xcf -> CURRENCY SIGN - u'\ufffe' # 0xd0 -> UNDEFINED - u'\ufffe' # 0xd1 -> UNDEFINED - u'\ufffe' # 0xd2 -> UNDEFINED - u'\ufffe' # 0xd3 -> UNDEFINEDS - u'\ufffe' # 0xd4 -> UNDEFINED - u'\ufffe' # 0xd5 -> UNDEFINED - u'\ufffe' # 0xd6 -> UNDEFINEDE - u'\ufffe' # 0xd7 -> UNDEFINED - u'\ufffe' # 0xd8 -> UNDEFINED - u'\u2518' # 0xd9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0xda -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0xdb -> FULL BLOCK - u'\u2584' # 0xdc -> LOWER HALF BLOCK - u'\xa6' # 0xdd -> BROKEN BAR - u'\ufffe' # 0xde -> UNDEFINED - u'\u2580' # 0xdf -> UPPER HALF BLOCK - u'\ufffe' # 0xe0 -> UNDEFINED - u'\ufffe' # 0xe1 -> UNDEFINED - u'\ufffe' # 0xe2 -> UNDEFINED - u'\ufffe' # 0xe3 -> UNDEFINED - u'\ufffe' # 0xe4 -> UNDEFINED - u'\ufffe' # 0xe5 -> UNDEFINED - u'\xb5' # 0xe6 -> MICRO SIGN - u'\ufffe' # 0xe7 -> UNDEFINED - u'\ufffe' # 0xe8 -> UNDEFINED - u'\ufffe' # 0xe9 -> UNDEFINED - u'\ufffe' # 0xea -> UNDEFINED - u'\ufffe' # 0xeb -> UNDEFINED - u'\ufffe' # 0xec -> UNDEFINED - u'\ufffe' # 0xed -> UNDEFINED - u'\xaf' # 0xee -> MACRON - u'\xb4' # 0xef -> ACUTE ACCENT - u'\xad' # 0xf0 -> SOFT HYPHEN - u'\xb1' # 0xf1 -> PLUS-MINUS SIGN - u'\u2017' # 0xf2 -> DOUBLE LOW LINE - u'\xbe' # 0xf3 -> VULGAR FRACTION THREE QUARTERS - u'\xb6' # 0xf4 -> PILCROW SIGN - u'\xa7' # 0xf5 -> SECTION SIGN - u'\xf7' # 0xf6 -> DIVISION SIGN - u'\xb8' # 0xf7 -> CEDILLA - u'\xb0' # 0xf8 -> DEGREE SIGN - u'\xa8' # 0xf9 -> DIAERESIS - u'\xb7' # 0xfa -> MIDDLE DOT - u'\xb9' # 0xfb -> SUPERSCRIPT ONE - u'\xb3' # 0xfc -> SUPERSCRIPT THREE - u'\xb2' # 0xfd -> SUPERSCRIPT TWO - u'\u25a0' # 0xfe -> BLACK SQUARE - u'\xa0' # 0xff -> NO-BREAK SPACE + u'\u05ea' # 0x9A -> HEBREW LETTER TAV + u'\ufffe' # 0x9B -> UNDEFINED + u'\xa3' # 0x9C -> POUND SIGN + u'\ufffe' # 0x9D -> UNDEFINED + u'\xd7' # 0x9E -> MULTIPLICATION SIGN + u'\ufffe' # 0x9F -> UNDEFINED + u'\ufffe' # 0xA0 -> UNDEFINED + u'\ufffe' # 0xA1 -> UNDEFINED + u'\ufffe' # 0xA2 -> UNDEFINED + u'\ufffe' # 0xA3 -> UNDEFINED + u'\ufffe' # 0xA4 -> UNDEFINED + u'\ufffe' # 0xA5 -> UNDEFINED + u'\ufffe' # 0xA6 -> UNDEFINED + u'\ufffe' # 0xA7 -> UNDEFINED + u'\ufffe' # 0xA8 -> UNDEFINED + u'\xae' # 0xA9 -> REGISTERED SIGN + u'\xac' # 0xAA -> NOT SIGN + u'\xbd' # 0xAB -> VULGAR FRACTION ONE HALF + u'\xbc' # 0xAC -> VULGAR FRACTION ONE QUARTER + u'\ufffe' # 0xAD -> UNDEFINED + u'\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0xB0 -> LIGHT SHADE + u'\u2592' # 0xB1 -> MEDIUM SHADE + u'\u2593' # 0xB2 -> DARK SHADE + u'\u2502' # 0xB3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\ufffe' # 0xB5 -> UNDEFINED + u'\ufffe' # 0xB6 -> UNDEFINED + u'\ufffe' # 0xB7 -> UNDEFINED + u'\xa9' # 0xB8 -> COPYRIGHT SIGN + u'\u2563' # 0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0xBA -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\xa2' # 0xBD -> CENT SIGN + u'\xa5' # 0xBE -> YEN SIGN + u'\u2510' # 0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\ufffe' # 0xC6 -> UNDEFINED + u'\ufffe' # 0xC7 -> UNDEFINED + u'\u255a' # 0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0xCF -> CURRENCY SIGN + u'\ufffe' # 0xD0 -> UNDEFINED + u'\ufffe' # 0xD1 -> UNDEFINED + u'\ufffe' # 0xD2 -> UNDEFINED + u'\ufffe' # 0xD3 -> UNDEFINEDS + u'\ufffe' # 0xD4 -> UNDEFINED + u'\ufffe' # 0xD5 -> UNDEFINED + u'\ufffe' # 0xD6 -> UNDEFINEDE + u'\ufffe' # 0xD7 -> UNDEFINED + u'\ufffe' # 0xD8 -> UNDEFINED + u'\u2518' # 0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0xDB -> FULL BLOCK + u'\u2584' # 0xDC -> LOWER HALF BLOCK + u'\xa6' # 0xDD -> BROKEN BAR + u'\ufffe' # 0xDE -> UNDEFINED + u'\u2580' # 0xDF -> UPPER HALF BLOCK + u'\ufffe' # 0xE0 -> UNDEFINED + u'\ufffe' # 0xE1 -> UNDEFINED + u'\ufffe' # 0xE2 -> UNDEFINED + u'\ufffe' # 0xE3 -> UNDEFINED + u'\ufffe' # 0xE4 -> UNDEFINED + u'\ufffe' # 0xE5 -> UNDEFINED + u'\xb5' # 0xE6 -> MICRO SIGN + u'\ufffe' # 0xE7 -> UNDEFINED + u'\ufffe' # 0xE8 -> UNDEFINED + u'\ufffe' # 0xE9 -> UNDEFINED + u'\ufffe' # 0xEA -> UNDEFINED + u'\ufffe' # 0xEB -> UNDEFINED + u'\ufffe' # 0xEC -> UNDEFINED + u'\ufffe' # 0xED -> UNDEFINED + u'\xaf' # 0xEE -> MACRON + u'\xb4' # 0xEF -> ACUTE ACCENT + u'\xad' # 0xF0 -> SOFT HYPHEN + u'\xb1' # 0xF1 -> PLUS-MINUS SIGN + u'\u2017' # 0xF2 -> DOUBLE LOW LINE + u'\xbe' # 0xF3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0xF4 -> PILCROW SIGN + u'\xa7' # 0xF5 -> SECTION SIGN + u'\xf7' # 0xF6 -> DIVISION SIGN + u'\xb8' # 0xF7 -> CEDILLA + u'\xb0' # 0xF8 -> DEGREE SIGN + u'\xa8' # 0xF9 -> DIAERESIS + u'\xb7' # 0xFA -> MIDDLE DOT + u'\xb9' # 0xFB -> SUPERSCRIPT ONE + u'\xb3' # 0xFC -> SUPERSCRIPT THREE + u'\xb2' # 0xFD -> SUPERSCRIPT TWO + u'\u25a0' # 0xFE -> BLACK SQUARE + u'\xa0' # 0xFF -> NO-BREAK SPACE ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,97 +415,98 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE - 0x00a0: 0xff, # NO-BREAK SPACE - 0x00a2: 0xbd, # CENT SIGN - 0x00a3: 0x9c, # POUND SIGN - 0x00a4: 0xcf, # CURRENCY SIGN - 0x00a5: 0xbe, # YEN SIGN - 0x00a6: 0xdd, # BROKEN BAR - 0x00a7: 0xf5, # SECTION SIGN - 0x00a8: 0xf9, # DIAERESIS - 0x00a9: 0xb8, # COPYRIGHT SIGN - 0x00ab: 0xae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xaa, # NOT SIGN - 0x00ad: 0xf0, # SOFT HYPHEN - 0x00ae: 0xa9, # REGISTERED SIGN - 0x00af: 0xee, # MACRON - 0x00b0: 0xf8, # DEGREE SIGN - 0x00b1: 0xf1, # PLUS-MINUS SIGN - 0x00b2: 0xfd, # SUPERSCRIPT TWO - 0x00b3: 0xfc, # SUPERSCRIPT THREE - 0x00b4: 0xef, # ACUTE ACCENT - 0x00b5: 0xe6, # MICRO SIGN - 0x00b6: 0xf4, # PILCROW SIGN - 0x00b7: 0xfa, # MIDDLE DOT - 0x00b8: 0xf7, # CEDILLA - 0x00b9: 0xfb, # SUPERSCRIPT ONE - 0x00bb: 0xaf, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xab, # VULGAR FRACTION ONE HALF - 0x00be: 0xf3, # VULGAR FRACTION THREE QUARTERS - 0x00d7: 0x9e, # MULTIPLICATION SIGN - 0x00f7: 0xf6, # DIVISION SIGN - 0x05d0: 0x80, # HEBREW LETTER ALEF - 0x05d1: 0x81, # HEBREW LETTER BET - 0x05d2: 0x82, # HEBREW LETTER GIMEL - 0x05d3: 0x83, # HEBREW LETTER DALET - 0x05d4: 0x84, # HEBREW LETTER HE - 0x05d5: 0x85, # HEBREW LETTER VAV - 0x05d6: 0x86, # HEBREW LETTER ZAYIN - 0x05d7: 0x87, # HEBREW LETTER HET - 0x05d8: 0x88, # HEBREW LETTER TET - 0x05d9: 0x89, # HEBREW LETTER YOD - 0x05da: 0x8a, # HEBREW LETTER FINAL KAF - 0x05db: 0x8b, # HEBREW LETTER KAF - 0x05dc: 0x8c, # HEBREW LETTER LAMED - 0x05dd: 0x8d, # HEBREW LETTER FINAL MEM - 0x05de: 0x8e, # HEBREW LETTER MEM - 0x05df: 0x8f, # HEBREW LETTER FINAL NUN - 0x05e0: 0x90, # HEBREW LETTER NUN - 0x05e1: 0x91, # HEBREW LETTER SAMEKH - 0x05e2: 0x92, # HEBREW LETTER AYIN - 0x05e3: 0x93, # HEBREW LETTER FINAL PE - 0x05e4: 0x94, # HEBREW LETTER PE - 0x05e5: 0x95, # HEBREW LETTER FINAL TSADI - 0x05e6: 0x96, # HEBREW LETTER TSADI - 0x05e7: 0x97, # HEBREW LETTER QOF - 0x05e8: 0x98, # HEBREW LETTER RESH - 0x05e9: 0x99, # HEBREW LETTER SHIN - 0x05ea: 0x9a, # HEBREW LETTER TAV - 0x2017: 0xf2, # DOUBLE LOW LINE - 0x2500: 0xc4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0xb3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0xda, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0xbf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0xc0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0xd9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0xc3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0xb4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0xc2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0xc1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0xc5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0xcd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0xba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0xc9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0xbb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0xc8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0xbc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0xcc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0xb9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0xcb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0xca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0xce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0xdf, # UPPER HALF BLOCK - 0x2584: 0xdc, # LOWER HALF BLOCK - 0x2588: 0xdb, # FULL BLOCK - 0x2591: 0xb0, # LIGHT SHADE - 0x2592: 0xb1, # MEDIUM SHADE - 0x2593: 0xb2, # DARK SHADE - 0x25a0: 0xfe, # BLACK SQUARE -} \ No newline at end of file + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xFF, # NO-BREAK SPACE + 0x00A2: 0xBD, # CENT SIGN + 0x00A3: 0x9C, # POUND SIGN + 0x00A4: 0xCF, # CURRENCY SIGN + 0x00A5: 0xBE, # YEN SIGN + 0x00A6: 0xDD, # BROKEN BAR + 0x00A7: 0xF5, # SECTION SIGN + 0x00A8: 0xF9, # DIAERESIS + 0x00A9: 0xB8, # COPYRIGHT SIGN + 0x00AB: 0xAE, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAA, # NOT SIGN + 0x00AD: 0xF0, # SOFT HYPHEN + 0x00AE: 0xA9, # REGISTERED SIGN + 0x00AF: 0xEE, # MACRON + 0x00B0: 0xF8, # DEGREE SIGN + 0x00B1: 0xF1, # PLUS-MINUS SIGN + 0x00B2: 0xFD, # SUPERSCRIPT TWO + 0x00B3: 0xFC, # SUPERSCRIPT THREE + 0x00B4: 0xEF, # ACUTE ACCENT + 0x00B5: 0xE6, # MICRO SIGN + 0x00B6: 0xF4, # PILCROW SIGN + 0x00B7: 0xFA, # MIDDLE DOT + 0x00B8: 0xF7, # CEDILLA + 0x00B9: 0xFB, # SUPERSCRIPT ONE + 0x00BB: 0xAF, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xAC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xAB, # VULGAR FRACTION ONE HALF + 0x00BE: 0xF3, # VULGAR FRACTION THREE QUARTERS + 0x00D7: 0x9E, # MULTIPLICATION SIGN + 0x00F7: 0xF6, # DIVISION SIGN + 0x05D0: 0x80, # HEBREW LETTER ALEF + 0x05D1: 0x81, # HEBREW LETTER BET + 0x05D2: 0x82, # HEBREW LETTER GIMEL + 0x05D3: 0x83, # HEBREW LETTER DALET + 0x05D4: 0x84, # HEBREW LETTER HE + 0x05D5: 0x85, # HEBREW LETTER VAV + 0x05D6: 0x86, # HEBREW LETTER ZAYIN + 0x05D7: 0x87, # HEBREW LETTER HET + 0x05D8: 0x88, # HEBREW LETTER TET + 0x05D9: 0x89, # HEBREW LETTER YOD + 0x05DA: 0x8A, # HEBREW LETTER FINAL KAF + 0x05DB: 0x8B, # HEBREW LETTER KAF + 0x05DC: 0x8C, # HEBREW LETTER LAMED + 0x05DD: 0x8D, # HEBREW LETTER FINAL MEM + 0x05DE: 0x8E, # HEBREW LETTER MEM + 0x05DF: 0x8F, # HEBREW LETTER FINAL NUN + 0x05E0: 0x90, # HEBREW LETTER NUN + 0x05E1: 0x91, # HEBREW LETTER SAMEKH + 0x05E2: 0x92, # HEBREW LETTER AYIN + 0x05E3: 0x93, # HEBREW LETTER FINAL PE + 0x05E4: 0x94, # HEBREW LETTER PE + 0x05E5: 0x95, # HEBREW LETTER FINAL TSADI + 0x05E6: 0x96, # HEBREW LETTER TSADI + 0x05E7: 0x97, # HEBREW LETTER QOF + 0x05E8: 0x98, # HEBREW LETTER RESH + 0x05E9: 0x99, # HEBREW LETTER SHIN + 0x05EA: 0x9A, # HEBREW LETTER TAV + 0x2017: 0xF2, # DOUBLE LOW LINE + 0x2500: 0xC4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0xB3, # BOX DRAWINGS LIGHT VERTICAL + 0x250C: 0xDA, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0xBF, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0xC0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0xD9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251C: 0xC3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0xB4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252C: 0xC2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0xC1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253C: 0xC5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0xCD, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0xBA, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0xC9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0xBB, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255A: 0xC8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255D: 0xBC, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0xCC, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0xB9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0xCB, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0xCA, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256C: 0xCE, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0xDF, # UPPER HALF BLOCK + 0x2584: 0xDC, # LOWER HALF BLOCK + 0x2588: 0xDB, # FULL BLOCK + 0x2591: 0xB0, # LIGHT SHADE + 0x2592: 0xB1, # MEDIUM SHADE + 0x2593: 0xB2, # DARK SHADE + 0x25A0: 0xFE, # BLACK SQUARE +} + Index: cp874.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp874.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp874.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp874.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\u20ac' # 0x80 -> EURO SIGN u'\ufffe' # 0x81 -> UNDEFINED u'\ufffe' # 0x82 -> UNDEFINED @@ -170,12 +170,12 @@ u'\ufffe' # 0x87 -> UNDEFINED u'\ufffe' # 0x88 -> UNDEFINED u'\ufffe' # 0x89 -> UNDEFINED - u'\ufffe' # 0x8a -> UNDEFINED - u'\ufffe' # 0x8b -> UNDEFINED - u'\ufffe' # 0x8c -> UNDEFINED - u'\ufffe' # 0x8d -> UNDEFINED - u'\ufffe' # 0x8e -> UNDEFINED - u'\ufffe' # 0x8f -> UNDEFINED + u'\ufffe' # 0x8A -> UNDEFINED + u'\ufffe' # 0x8B -> UNDEFINED + u'\ufffe' # 0x8C -> UNDEFINED + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\ufffe' # 0x8F -> UNDEFINED u'\ufffe' # 0x90 -> UNDEFINED u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK @@ -186,108 +186,108 @@ u'\u2014' # 0x97 -> EM DASH u'\ufffe' # 0x98 -> UNDEFINED u'\ufffe' # 0x99 -> UNDEFINED - u'\ufffe' # 0x9a -> UNDEFINED - u'\ufffe' # 0x9b -> UNDEFINED - u'\ufffe' # 0x9c -> UNDEFINED - u'\ufffe' # 0x9d -> UNDEFINED - u'\ufffe' # 0x9e -> UNDEFINED - u'\ufffe' # 0x9f -> UNDEFINED - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u0e01' # 0xa1 -> THAI CHARACTER KO KAI - u'\u0e02' # 0xa2 -> THAI CHARACTER KHO KHAI - u'\u0e03' # 0xa3 -> THAI CHARACTER KHO KHUAT - u'\u0e04' # 0xa4 -> THAI CHARACTER KHO KHWAI - u'\u0e05' # 0xa5 -> THAI CHARACTER KHO KHON - u'\u0e06' # 0xa6 -> THAI CHARACTER KHO RAKHANG - u'\u0e07' # 0xa7 -> THAI CHARACTER NGO NGU - u'\u0e08' # 0xa8 -> THAI CHARACTER CHO CHAN - u'\u0e09' # 0xa9 -> THAI CHARACTER CHO CHING - u'\u0e0a' # 0xaa -> THAI CHARACTER CHO CHANG - u'\u0e0b' # 0xab -> THAI CHARACTER SO SO - u'\u0e0c' # 0xac -> THAI CHARACTER CHO CHOE - u'\u0e0d' # 0xad -> THAI CHARACTER YO YING - u'\u0e0e' # 0xae -> THAI CHARACTER DO CHADA - u'\u0e0f' # 0xaf -> THAI CHARACTER TO PATAK - u'\u0e10' # 0xb0 -> THAI CHARACTER THO THAN - u'\u0e11' # 0xb1 -> THAI CHARACTER THO NANGMONTHO - u'\u0e12' # 0xb2 -> THAI CHARACTER THO PHUTHAO - u'\u0e13' # 0xb3 -> THAI CHARACTER NO NEN - u'\u0e14' # 0xb4 -> THAI CHARACTER DO DEK - u'\u0e15' # 0xb5 -> THAI CHARACTER TO TAO - u'\u0e16' # 0xb6 -> THAI CHARACTER THO THUNG - u'\u0e17' # 0xb7 -> THAI CHARACTER THO THAHAN - u'\u0e18' # 0xb8 -> THAI CHARACTER THO THONG - u'\u0e19' # 0xb9 -> THAI CHARACTER NO NU - u'\u0e1a' # 0xba -> THAI CHARACTER BO BAIMAI - u'\u0e1b' # 0xbb -> THAI CHARACTER PO PLA - u'\u0e1c' # 0xbc -> THAI CHARACTER PHO PHUNG - u'\u0e1d' # 0xbd -> THAI CHARACTER FO FA - u'\u0e1e' # 0xbe -> THAI CHARACTER PHO PHAN - u'\u0e1f' # 0xbf -> THAI CHARACTER FO FAN - u'\u0e20' # 0xc0 -> THAI CHARACTER PHO SAMPHAO - u'\u0e21' # 0xc1 -> THAI CHARACTER MO MA - u'\u0e22' # 0xc2 -> THAI CHARACTER YO YAK - u'\u0e23' # 0xc3 -> THAI CHARACTER RO RUA - u'\u0e24' # 0xc4 -> THAI CHARACTER RU - u'\u0e25' # 0xc5 -> THAI CHARACTER LO LING - u'\u0e26' # 0xc6 -> THAI CHARACTER LU - u'\u0e27' # 0xc7 -> THAI CHARACTER WO WAEN - u'\u0e28' # 0xc8 -> THAI CHARACTER SO SALA - u'\u0e29' # 0xc9 -> THAI CHARACTER SO RUSI - u'\u0e2a' # 0xca -> THAI CHARACTER SO SUA - u'\u0e2b' # 0xcb -> THAI CHARACTER HO HIP - u'\u0e2c' # 0xcc -> THAI CHARACTER LO CHULA - u'\u0e2d' # 0xcd -> THAI CHARACTER O ANG - u'\u0e2e' # 0xce -> THAI CHARACTER HO NOKHUK - u'\u0e2f' # 0xcf -> THAI CHARACTER PAIYANNOI - u'\u0e30' # 0xd0 -> THAI CHARACTER SARA A - u'\u0e31' # 0xd1 -> THAI CHARACTER MAI HAN-AKAT - u'\u0e32' # 0xd2 -> THAI CHARACTER SARA AA - u'\u0e33' # 0xd3 -> THAI CHARACTER SARA AM - u'\u0e34' # 0xd4 -> THAI CHARACTER SARA I - u'\u0e35' # 0xd5 -> THAI CHARACTER SARA II - u'\u0e36' # 0xd6 -> THAI CHARACTER SARA UE - u'\u0e37' # 0xd7 -> THAI CHARACTER SARA UEE - u'\u0e38' # 0xd8 -> THAI CHARACTER SARA U - u'\u0e39' # 0xd9 -> THAI CHARACTER SARA UU - u'\u0e3a' # 0xda -> THAI CHARACTER PHINTHU - u'\ufffe' # 0xdb -> UNDEFINED - u'\ufffe' # 0xdc -> UNDEFINED - u'\ufffe' # 0xdd -> UNDEFINED - u'\ufffe' # 0xde -> UNDEFINED - u'\u0e3f' # 0xdf -> THAI CURRENCY SYMBOL BAHT - u'\u0e40' # 0xe0 -> THAI CHARACTER SARA E - u'\u0e41' # 0xe1 -> THAI CHARACTER SARA AE - u'\u0e42' # 0xe2 -> THAI CHARACTER SARA O - u'\u0e43' # 0xe3 -> THAI CHARACTER SARA AI MAIMUAN - u'\u0e44' # 0xe4 -> THAI CHARACTER SARA AI MAIMALAI - u'\u0e45' # 0xe5 -> THAI CHARACTER LAKKHANGYAO - u'\u0e46' # 0xe6 -> THAI CHARACTER MAIYAMOK - u'\u0e47' # 0xe7 -> THAI CHARACTER MAITAIKHU - u'\u0e48' # 0xe8 -> THAI CHARACTER MAI EK - u'\u0e49' # 0xe9 -> THAI CHARACTER MAI THO - u'\u0e4a' # 0xea -> THAI CHARACTER MAI TRI - u'\u0e4b' # 0xeb -> THAI CHARACTER MAI CHATTAWA - u'\u0e4c' # 0xec -> THAI CHARACTER THANTHAKHAT - u'\u0e4d' # 0xed -> THAI CHARACTER NIKHAHIT - u'\u0e4e' # 0xee -> THAI CHARACTER YAMAKKAN - u'\u0e4f' # 0xef -> THAI CHARACTER FONGMAN - u'\u0e50' # 0xf0 -> THAI DIGIT ZERO - u'\u0e51' # 0xf1 -> THAI DIGIT ONE - u'\u0e52' # 0xf2 -> THAI DIGIT TWO - u'\u0e53' # 0xf3 -> THAI DIGIT THREE - u'\u0e54' # 0xf4 -> THAI DIGIT FOUR - u'\u0e55' # 0xf5 -> THAI DIGIT FIVE - u'\u0e56' # 0xf6 -> THAI DIGIT SIX - u'\u0e57' # 0xf7 -> THAI DIGIT SEVEN - u'\u0e58' # 0xf8 -> THAI DIGIT EIGHT - u'\u0e59' # 0xf9 -> THAI DIGIT NINE - u'\u0e5a' # 0xfa -> THAI CHARACTER ANGKHANKHU - u'\u0e5b' # 0xfb -> THAI CHARACTER KHOMUT - u'\ufffe' # 0xfc -> UNDEFINED - u'\ufffe' # 0xfd -> UNDEFINED - u'\ufffe' # 0xfe -> UNDEFINED - u'\ufffe' # 0xff -> UNDEFINED + u'\ufffe' # 0x9A -> UNDEFINED + u'\ufffe' # 0x9B -> UNDEFINED + u'\ufffe' # 0x9C -> UNDEFINED + u'\ufffe' # 0x9D -> UNDEFINED + u'\ufffe' # 0x9E -> UNDEFINED + u'\ufffe' # 0x9F -> UNDEFINED + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI + u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI + u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT + u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI + u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON + u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG + u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU + u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN + u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING + u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG + u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO + u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE + u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING + u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA + u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK + u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN + u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO + u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO + u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN + u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK + u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO + u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG + u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN + u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG + u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU + u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI + u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA + u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG + u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA + u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN + u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN + u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO + u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA + u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK + u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA + u'\u0e24' # 0xC4 -> THAI CHARACTER RU + u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING + u'\u0e26' # 0xC6 -> THAI CHARACTER LU + u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN + u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA + u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI + u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA + u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP + u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA + u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG + u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK + u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI + u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A + u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT + u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA + u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM + u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I + u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II + u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE + u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE + u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U + u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU + u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU + u'\ufffe' # 0xDB -> UNDEFINED + u'\ufffe' # 0xDC -> UNDEFINED + u'\ufffe' # 0xDD -> UNDEFINED + u'\ufffe' # 0xDE -> UNDEFINED + u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT + u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E + u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE + u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O + u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN + u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI + u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO + u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK + u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU + u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK + u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO + u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI + u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA + u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT + u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT + u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN + u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN + u'\u0e50' # 0xF0 -> THAI DIGIT ZERO + u'\u0e51' # 0xF1 -> THAI DIGIT ONE + u'\u0e52' # 0xF2 -> THAI DIGIT TWO + u'\u0e53' # 0xF3 -> THAI DIGIT THREE + u'\u0e54' # 0xF4 -> THAI DIGIT FOUR + u'\u0e55' # 0xF5 -> THAI DIGIT FIVE + u'\u0e56' # 0xF6 -> THAI DIGIT SIX + u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN + u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT + u'\u0e59' # 0xF9 -> THAI DIGIT NINE + u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU + u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT + u'\ufffe' # 0xFC -> UNDEFINED + u'\ufffe' # 0xFD -> UNDEFINED + u'\ufffe' # 0xFE -> UNDEFINED + u'\ufffe' # 0xFF -> UNDEFINED ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,107 +415,108 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x0e01: 0xa1, # THAI CHARACTER KO KAI - 0x0e02: 0xa2, # THAI CHARACTER KHO KHAI - 0x0e03: 0xa3, # THAI CHARACTER KHO KHUAT - 0x0e04: 0xa4, # THAI CHARACTER KHO KHWAI - 0x0e05: 0xa5, # THAI CHARACTER KHO KHON - 0x0e06: 0xa6, # THAI CHARACTER KHO RAKHANG - 0x0e07: 0xa7, # THAI CHARACTER NGO NGU - 0x0e08: 0xa8, # THAI CHARACTER CHO CHAN - 0x0e09: 0xa9, # THAI CHARACTER CHO CHING - 0x0e0a: 0xaa, # THAI CHARACTER CHO CHANG - 0x0e0b: 0xab, # THAI CHARACTER SO SO - 0x0e0c: 0xac, # THAI CHARACTER CHO CHOE - 0x0e0d: 0xad, # THAI CHARACTER YO YING - 0x0e0e: 0xae, # THAI CHARACTER DO CHADA - 0x0e0f: 0xaf, # THAI CHARACTER TO PATAK - 0x0e10: 0xb0, # THAI CHARACTER THO THAN - 0x0e11: 0xb1, # THAI CHARACTER THO NANGMONTHO - 0x0e12: 0xb2, # THAI CHARACTER THO PHUTHAO - 0x0e13: 0xb3, # THAI CHARACTER NO NEN - 0x0e14: 0xb4, # THAI CHARACTER DO DEK - 0x0e15: 0xb5, # THAI CHARACTER TO TAO - 0x0e16: 0xb6, # THAI CHARACTER THO THUNG - 0x0e17: 0xb7, # THAI CHARACTER THO THAHAN - 0x0e18: 0xb8, # THAI CHARACTER THO THONG - 0x0e19: 0xb9, # THAI CHARACTER NO NU - 0x0e1a: 0xba, # THAI CHARACTER BO BAIMAI - 0x0e1b: 0xbb, # THAI CHARACTER PO PLA - 0x0e1c: 0xbc, # THAI CHARACTER PHO PHUNG - 0x0e1d: 0xbd, # THAI CHARACTER FO FA - 0x0e1e: 0xbe, # THAI CHARACTER PHO PHAN - 0x0e1f: 0xbf, # THAI CHARACTER FO FAN - 0x0e20: 0xc0, # THAI CHARACTER PHO SAMPHAO - 0x0e21: 0xc1, # THAI CHARACTER MO MA - 0x0e22: 0xc2, # THAI CHARACTER YO YAK - 0x0e23: 0xc3, # THAI CHARACTER RO RUA - 0x0e24: 0xc4, # THAI CHARACTER RU - 0x0e25: 0xc5, # THAI CHARACTER LO LING - 0x0e26: 0xc6, # THAI CHARACTER LU - 0x0e27: 0xc7, # THAI CHARACTER WO WAEN - 0x0e28: 0xc8, # THAI CHARACTER SO SALA - 0x0e29: 0xc9, # THAI CHARACTER SO RUSI - 0x0e2a: 0xca, # THAI CHARACTER SO SUA - 0x0e2b: 0xcb, # THAI CHARACTER HO HIP - 0x0e2c: 0xcc, # THAI CHARACTER LO CHULA - 0x0e2d: 0xcd, # THAI CHARACTER O ANG - 0x0e2e: 0xce, # THAI CHARACTER HO NOKHUK - 0x0e2f: 0xcf, # THAI CHARACTER PAIYANNOI - 0x0e30: 0xd0, # THAI CHARACTER SARA A - 0x0e31: 0xd1, # THAI CHARACTER MAI HAN-AKAT - 0x0e32: 0xd2, # THAI CHARACTER SARA AA - 0x0e33: 0xd3, # THAI CHARACTER SARA AM - 0x0e34: 0xd4, # THAI CHARACTER SARA I - 0x0e35: 0xd5, # THAI CHARACTER SARA II - 0x0e36: 0xd6, # THAI CHARACTER SARA UE - 0x0e37: 0xd7, # THAI CHARACTER SARA UEE - 0x0e38: 0xd8, # THAI CHARACTER SARA U - 0x0e39: 0xd9, # THAI CHARACTER SARA UU - 0x0e3a: 0xda, # THAI CHARACTER PHINTHU - 0x0e3f: 0xdf, # THAI CURRENCY SYMBOL BAHT - 0x0e40: 0xe0, # THAI CHARACTER SARA E - 0x0e41: 0xe1, # THAI CHARACTER SARA AE - 0x0e42: 0xe2, # THAI CHARACTER SARA O - 0x0e43: 0xe3, # THAI CHARACTER SARA AI MAIMUAN - 0x0e44: 0xe4, # THAI CHARACTER SARA AI MAIMALAI - 0x0e45: 0xe5, # THAI CHARACTER LAKKHANGYAO - 0x0e46: 0xe6, # THAI CHARACTER MAIYAMOK - 0x0e47: 0xe7, # THAI CHARACTER MAITAIKHU - 0x0e48: 0xe8, # THAI CHARACTER MAI EK - 0x0e49: 0xe9, # THAI CHARACTER MAI THO - 0x0e4a: 0xea, # THAI CHARACTER MAI TRI - 0x0e4b: 0xeb, # THAI CHARACTER MAI CHATTAWA - 0x0e4c: 0xec, # THAI CHARACTER THANTHAKHAT - 0x0e4d: 0xed, # THAI CHARACTER NIKHAHIT - 0x0e4e: 0xee, # THAI CHARACTER YAMAKKAN - 0x0e4f: 0xef, # THAI CHARACTER FONGMAN - 0x0e50: 0xf0, # THAI DIGIT ZERO - 0x0e51: 0xf1, # THAI DIGIT ONE - 0x0e52: 0xf2, # THAI DIGIT TWO - 0x0e53: 0xf3, # THAI DIGIT THREE - 0x0e54: 0xf4, # THAI DIGIT FOUR - 0x0e55: 0xf5, # THAI DIGIT FIVE - 0x0e56: 0xf6, # THAI DIGIT SIX - 0x0e57: 0xf7, # THAI DIGIT SEVEN - 0x0e58: 0xf8, # THAI DIGIT EIGHT - 0x0e59: 0xf9, # THAI DIGIT NINE - 0x0e5a: 0xfa, # THAI CHARACTER ANGKHANKHU - 0x0e5b: 0xfb, # THAI CHARACTER KHOMUT + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x0E01: 0xA1, # THAI CHARACTER KO KAI + 0x0E02: 0xA2, # THAI CHARACTER KHO KHAI + 0x0E03: 0xA3, # THAI CHARACTER KHO KHUAT + 0x0E04: 0xA4, # THAI CHARACTER KHO KHWAI + 0x0E05: 0xA5, # THAI CHARACTER KHO KHON + 0x0E06: 0xA6, # THAI CHARACTER KHO RAKHANG + 0x0E07: 0xA7, # THAI CHARACTER NGO NGU + 0x0E08: 0xA8, # THAI CHARACTER CHO CHAN + 0x0E09: 0xA9, # THAI CHARACTER CHO CHING + 0x0E0A: 0xAA, # THAI CHARACTER CHO CHANG + 0x0E0B: 0xAB, # THAI CHARACTER SO SO + 0x0E0C: 0xAC, # THAI CHARACTER CHO CHOE + 0x0E0D: 0xAD, # THAI CHARACTER YO YING + 0x0E0E: 0xAE, # THAI CHARACTER DO CHADA + 0x0E0F: 0xAF, # THAI CHARACTER TO PATAK + 0x0E10: 0xB0, # THAI CHARACTER THO THAN + 0x0E11: 0xB1, # THAI CHARACTER THO NANGMONTHO + 0x0E12: 0xB2, # THAI CHARACTER THO PHUTHAO + 0x0E13: 0xB3, # THAI CHARACTER NO NEN + 0x0E14: 0xB4, # THAI CHARACTER DO DEK + 0x0E15: 0xB5, # THAI CHARACTER TO TAO + 0x0E16: 0xB6, # THAI CHARACTER THO THUNG + 0x0E17: 0xB7, # THAI CHARACTER THO THAHAN + 0x0E18: 0xB8, # THAI CHARACTER THO THONG + 0x0E19: 0xB9, # THAI CHARACTER NO NU + 0x0E1A: 0xBA, # THAI CHARACTER BO BAIMAI + 0x0E1B: 0xBB, # THAI CHARACTER PO PLA + 0x0E1C: 0xBC, # THAI CHARACTER PHO PHUNG + 0x0E1D: 0xBD, # THAI CHARACTER FO FA + 0x0E1E: 0xBE, # THAI CHARACTER PHO PHAN + 0x0E1F: 0xBF, # THAI CHARACTER FO FAN + 0x0E20: 0xC0, # THAI CHARACTER PHO SAMPHAO + 0x0E21: 0xC1, # THAI CHARACTER MO MA + 0x0E22: 0xC2, # THAI CHARACTER YO YAK + 0x0E23: 0xC3, # THAI CHARACTER RO RUA + 0x0E24: 0xC4, # THAI CHARACTER RU + 0x0E25: 0xC5, # THAI CHARACTER LO LING + 0x0E26: 0xC6, # THAI CHARACTER LU + 0x0E27: 0xC7, # THAI CHARACTER WO WAEN + 0x0E28: 0xC8, # THAI CHARACTER SO SALA + 0x0E29: 0xC9, # THAI CHARACTER SO RUSI + 0x0E2A: 0xCA, # THAI CHARACTER SO SUA + 0x0E2B: 0xCB, # THAI CHARACTER HO HIP + 0x0E2C: 0xCC, # THAI CHARACTER LO CHULA + 0x0E2D: 0xCD, # THAI CHARACTER O ANG + 0x0E2E: 0xCE, # THAI CHARACTER HO NOKHUK + 0x0E2F: 0xCF, # THAI CHARACTER PAIYANNOI + 0x0E30: 0xD0, # THAI CHARACTER SARA A + 0x0E31: 0xD1, # THAI CHARACTER MAI HAN-AKAT + 0x0E32: 0xD2, # THAI CHARACTER SARA AA + 0x0E33: 0xD3, # THAI CHARACTER SARA AM + 0x0E34: 0xD4, # THAI CHARACTER SARA I + 0x0E35: 0xD5, # THAI CHARACTER SARA II + 0x0E36: 0xD6, # THAI CHARACTER SARA UE + 0x0E37: 0xD7, # THAI CHARACTER SARA UEE + 0x0E38: 0xD8, # THAI CHARACTER SARA U + 0x0E39: 0xD9, # THAI CHARACTER SARA UU + 0x0E3A: 0xDA, # THAI CHARACTER PHINTHU + 0x0E3F: 0xDF, # THAI CURRENCY SYMBOL BAHT + 0x0E40: 0xE0, # THAI CHARACTER SARA E + 0x0E41: 0xE1, # THAI CHARACTER SARA AE + 0x0E42: 0xE2, # THAI CHARACTER SARA O + 0x0E43: 0xE3, # THAI CHARACTER SARA AI MAIMUAN + 0x0E44: 0xE4, # THAI CHARACTER SARA AI MAIMALAI + 0x0E45: 0xE5, # THAI CHARACTER LAKKHANGYAO + 0x0E46: 0xE6, # THAI CHARACTER MAIYAMOK + 0x0E47: 0xE7, # THAI CHARACTER MAITAIKHU + 0x0E48: 0xE8, # THAI CHARACTER MAI EK + 0x0E49: 0xE9, # THAI CHARACTER MAI THO + 0x0E4A: 0xEA, # THAI CHARACTER MAI TRI + 0x0E4B: 0xEB, # THAI CHARACTER MAI CHATTAWA + 0x0E4C: 0xEC, # THAI CHARACTER THANTHAKHAT + 0x0E4D: 0xED, # THAI CHARACTER NIKHAHIT + 0x0E4E: 0xEE, # THAI CHARACTER YAMAKKAN + 0x0E4F: 0xEF, # THAI CHARACTER FONGMAN + 0x0E50: 0xF0, # THAI DIGIT ZERO + 0x0E51: 0xF1, # THAI DIGIT ONE + 0x0E52: 0xF2, # THAI DIGIT TWO + 0x0E53: 0xF3, # THAI DIGIT THREE + 0x0E54: 0xF4, # THAI DIGIT FOUR + 0x0E55: 0xF5, # THAI DIGIT FIVE + 0x0E56: 0xF6, # THAI DIGIT SIX + 0x0E57: 0xF7, # THAI DIGIT SEVEN + 0x0E58: 0xF8, # THAI DIGIT EIGHT + 0x0E59: 0xF9, # THAI DIGIT NINE + 0x0E5A: 0xFA, # THAI CHARACTER ANGKHANKHU + 0x0E5B: 0xFB, # THAI CHARACTER KHOMUT 0x2013: 0x96, # EN DASH 0x2014: 0x97, # EM DASH 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201c: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0x94, # RIGHT DOUBLE QUOTATION MARK + 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK 0x2022: 0x95, # BULLET 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x20ac: 0x80, # EURO SIGN -} \ No newline at end of file + 0x20AC: 0x80, # EURO SIGN +} + Index: cp875.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/cp875.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- cp875.py 24 Oct 2005 12:07:48 -0000 1.6 +++ cp875.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x7f' # 0x07 -> DELETE u'\x97' # 0x08 -> CONTROL u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0a -> CONTROL - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\x8e' # 0x0A -> CONTROL + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x87' # 0x17 -> CONTROL u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1a -> CONTROL - u'\x8f' # 0x1b -> CONTROL - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x92' # 0x1A -> CONTROL + u'\x8f' # 0x1B -> CONTROL + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u'\x80' # 0x20 -> CONTROL u'\x81' # 0x21 -> CONTROL u'\x82' # 0x22 -> CONTROL @@ -74,12 +74,12 @@ u'\x1b' # 0x27 -> ESCAPE u'\x88' # 0x28 -> CONTROL u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2a -> CONTROL - u'\x8b' # 0x2b -> CONTROL - u'\x8c' # 0x2c -> CONTROL - u'\x05' # 0x2d -> ENQUIRY - u'\x06' # 0x2e -> ACKNOWLEDGE - u'\x07' # 0x2f -> BELL + u'\x8a' # 0x2A -> CONTROL + u'\x8b' # 0x2B -> CONTROL + u'\x8c' # 0x2C -> CONTROL + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL u'\x90' # 0x30 -> CONTROL u'\x91' # 0x31 -> CONTROL u'\x16' # 0x32 -> SYNCHRONOUS IDLE @@ -90,12 +90,12 @@ u'\x04' # 0x37 -> END OF TRANSMISSION u'\x98' # 0x38 -> CONTROL u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3a -> CONTROL - u'\x9b' # 0x3b -> CONTROL - u'\x14' # 0x3c -> DEVICE CONTROL FOUR - u'\x15' # 0x3d -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3e -> CONTROL - u'\x1a' # 0x3f -> SUBSTITUTE + u'\x9a' # 0x3A -> CONTROL + u'\x9b' # 0x3B -> CONTROL + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> CONTROL + u'\x1a' # 0x3F -> SUBSTITUTE u' ' # 0x40 -> SPACE u'\u0391' # 0x41 -> GREEK CAPITAL LETTER ALPHA u'\u0392' # 0x42 -> GREEK CAPITAL LETTER BETA @@ -106,12 +106,12 @@ u'\u0397' # 0x47 -> GREEK CAPITAL LETTER ETA u'\u0398' # 0x48 -> GREEK CAPITAL LETTER THETA u'\u0399' # 0x49 -> GREEK CAPITAL LETTER IOTA - u'[' # 0x4a -> LEFT SQUARE BRACKET - u'.' # 0x4b -> FULL STOP - u'<' # 0x4c -> LESS-THAN SIGN - u'(' # 0x4d -> LEFT PARENTHESIS - u'+' # 0x4e -> PLUS SIGN - u'!' # 0x4f -> EXCLAMATION MARK + u'[' # 0x4A -> LEFT SQUARE BRACKET + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'!' # 0x4F -> EXCLAMATION MARK u'&' # 0x50 -> AMPERSAND u'\u039a' # 0x51 -> GREEK CAPITAL LETTER KAPPA u'\u039b' # 0x52 -> GREEK CAPITAL LETTER LAMDA @@ -122,12 +122,12 @@ u'\u03a0' # 0x57 -> GREEK CAPITAL LETTER PI u'\u03a1' # 0x58 -> GREEK CAPITAL LETTER RHO u'\u03a3' # 0x59 -> GREEK CAPITAL LETTER SIGMA - u']' # 0x5a -> RIGHT SQUARE BRACKET - u'$' # 0x5b -> DOLLAR SIGN - u'*' # 0x5c -> ASTERISK - u')' # 0x5d -> RIGHT PARENTHESIS - u';' # 0x5e -> SEMICOLON - u'^' # 0x5f -> CIRCUMFLEX ACCENT + u']' # 0x5A -> RIGHT SQUARE BRACKET + u'$' # 0x5B -> DOLLAR SIGN + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'^' # 0x5F -> CIRCUMFLEX ACCENT u'-' # 0x60 -> HYPHEN-MINUS u'/' # 0x61 -> SOLIDUS u'\u03a4' # 0x62 -> GREEK CAPITAL LETTER TAU @@ -138,12 +138,12 @@ u'\u03a9' # 0x67 -> GREEK CAPITAL LETTER OMEGA u'\u03aa' # 0x68 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA u'\u03ab' # 0x69 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'|' # 0x6a -> VERTICAL LINE - u',' # 0x6b -> COMMA - u'%' # 0x6c -> PERCENT SIGN - u'_' # 0x6d -> LOW LINE - u'>' # 0x6e -> GREATER-THAN SIGN - u'?' # 0x6f -> QUESTION MARK + u'|' # 0x6A -> VERTICAL LINE + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK u'\xa8' # 0x70 -> DIAERESIS u'\u0386' # 0x71 -> GREEK CAPITAL LETTER ALPHA WITH TONOS u'\u0388' # 0x72 -> GREEK CAPITAL LETTER EPSILON WITH TONOS @@ -154,12 +154,12 @@ u'\u038e' # 0x77 -> GREEK CAPITAL LETTER UPSILON WITH TONOS u'\u038f' # 0x78 -> GREEK CAPITAL LETTER OMEGA WITH TONOS u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7a -> COLON - u'#' # 0x7b -> NUMBER SIGN - u'@' # 0x7c -> COMMERCIAL AT - u"'" # 0x7d -> APOSTROPHE - u'=' # 0x7e -> EQUALS SIGN - u'"' # 0x7f -> QUOTATION MARK + u':' # 0x7A -> COLON + u'#' # 0x7B -> NUMBER SIGN + u'@' # 0x7C -> COMMERCIAL AT + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'"' # 0x7F -> QUOTATION MARK u'\u0385' # 0x80 -> GREEK DIALYTIKA TONOS u'a' # 0x81 -> LATIN SMALL LETTER A u'b' # 0x82 -> LATIN SMALL LETTER B @@ -170,12 +170,12 @@ u'g' # 0x87 -> LATIN SMALL LETTER G u'h' # 0x88 -> LATIN SMALL LETTER H u'i' # 0x89 -> LATIN SMALL LETTER I - u'\u03b1' # 0x8a -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0x8b -> GREEK SMALL LETTER BETA - u'\u03b3' # 0x8c -> GREEK SMALL LETTER GAMMA - u'\u03b4' # 0x8d -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0x8e -> GREEK SMALL LETTER EPSILON - u'\u03b6' # 0x8f -> GREEK SMALL LETTER ZETA + u'\u03b1' # 0x8A -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x8B -> GREEK SMALL LETTER BETA + u'\u03b3' # 0x8C -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0x8D -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x8E -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0x8F -> GREEK SMALL LETTER ZETA u'\xb0' # 0x90 -> DEGREE SIGN u'j' # 0x91 -> LATIN SMALL LETTER J u'k' # 0x92 -> LATIN SMALL LETTER K @@ -186,108 +186,108 @@ u'p' # 0x97 -> LATIN SMALL LETTER P u'q' # 0x98 -> LATIN SMALL LETTER Q u'r' # 0x99 -> LATIN SMALL LETTER R - u'\u03b7' # 0x9a -> GREEK SMALL LETTER ETA - u'\u03b8' # 0x9b -> GREEK SMALL LETTER THETA - u'\u03b9' # 0x9c -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0x9d -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0x9e -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0x9f -> GREEK SMALL LETTER MU - u'\xb4' # 0xa0 -> ACUTE ACCENT - u'~' # 0xa1 -> TILDE - u's' # 0xa2 -> LATIN SMALL LETTER S - u't' # 0xa3 -> LATIN SMALL LETTER T - u'u' # 0xa4 -> LATIN SMALL LETTER U - u'v' # 0xa5 -> LATIN SMALL LETTER V - u'w' # 0xa6 -> LATIN SMALL LETTER W - u'x' # 0xa7 -> LATIN SMALL LETTER X - u'y' # 0xa8 -> LATIN SMALL LETTER Y - u'z' # 0xa9 -> LATIN SMALL LETTER Z - u'\u03bd' # 0xaa -> GREEK SMALL LETTER NU - u'\u03be' # 0xab -> GREEK SMALL LETTER XI - u'\u03bf' # 0xac -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xad -> GREEK SMALL LETTER PI - u'\u03c1' # 0xae -> GREEK SMALL LETTER RHO - u'\u03c3' # 0xaf -> GREEK SMALL LETTER SIGMA - u'\xa3' # 0xb0 -> POUND SIGN - u'\u03ac' # 0xb1 -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u03ad' # 0xb2 -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xb3 -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03ca' # 0xb4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03af' # 0xb5 -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03cc' # 0xb6 -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0xb7 -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03cb' # 0xb8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03ce' # 0xb9 -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\u03c2' # 0xba -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c4' # 0xbb -> GREEK SMALL LETTER TAU - u'\u03c5' # 0xbc -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0xbd -> GREEK SMALL LETTER PHI - u'\u03c7' # 0xbe -> GREEK SMALL LETTER CHI - u'\u03c8' # 0xbf -> GREEK SMALL LETTER PSI - u'{' # 0xc0 -> LEFT CURLY BRACKET - u'A' # 0xc1 -> LATIN CAPITAL LETTER A - u'B' # 0xc2 -> LATIN CAPITAL LETTER B - u'C' # 0xc3 -> LATIN CAPITAL LETTER C - u'D' # 0xc4 -> LATIN CAPITAL LETTER D - u'E' # 0xc5 -> LATIN CAPITAL LETTER E - u'F' # 0xc6 -> LATIN CAPITAL LETTER F - u'G' # 0xc7 -> LATIN CAPITAL LETTER G - u'H' # 0xc8 -> LATIN CAPITAL LETTER H - u'I' # 0xc9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xca -> SOFT HYPHEN - u'\u03c9' # 0xcb -> GREEK SMALL LETTER OMEGA - u'\u0390' # 0xcc -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u03b0' # 0xcd -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\u2018' # 0xce -> LEFT SINGLE QUOTATION MARK - u'\u2015' # 0xcf -> HORIZONTAL BAR - u'}' # 0xd0 -> RIGHT CURLY BRACKET - u'J' # 0xd1 -> LATIN CAPITAL LETTER J - u'K' # 0xd2 -> LATIN CAPITAL LETTER K - u'L' # 0xd3 -> LATIN CAPITAL LETTER L - u'M' # 0xd4 -> LATIN CAPITAL LETTER M - u'N' # 0xd5 -> LATIN CAPITAL LETTER N - u'O' # 0xd6 -> LATIN CAPITAL LETTER O - u'P' # 0xd7 -> LATIN CAPITAL LETTER P - u'Q' # 0xd8 -> LATIN CAPITAL LETTER Q - u'R' # 0xd9 -> LATIN CAPITAL LETTER R - u'\xb1' # 0xda -> PLUS-MINUS SIGN - u'\xbd' # 0xdb -> VULGAR FRACTION ONE HALF - u'\x1a' # 0xdc -> SUBSTITUTE - u'\u0387' # 0xdd -> GREEK ANO TELEIA - u'\u2019' # 0xde -> RIGHT SINGLE QUOTATION MARK - u'\xa6' # 0xdf -> BROKEN BAR - u'\\' # 0xe0 -> REVERSE SOLIDUS - u'\x1a' # 0xe1 -> SUBSTITUTE - u'S' # 0xe2 -> LATIN CAPITAL LETTER S - u'T' # 0xe3 -> LATIN CAPITAL LETTER T - u'U' # 0xe4 -> LATIN CAPITAL LETTER U - u'V' # 0xe5 -> LATIN CAPITAL LETTER V - u'W' # 0xe6 -> LATIN CAPITAL LETTER W - u'X' # 0xe7 -> LATIN CAPITAL LETTER X - u'Y' # 0xe8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xe9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xea -> SUPERSCRIPT TWO - u'\xa7' # 0xeb -> SECTION SIGN - u'\x1a' # 0xec -> SUBSTITUTE - u'\x1a' # 0xed -> SUBSTITUTE - u'\xab' # 0xee -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xef -> NOT SIGN - u'0' # 0xf0 -> DIGIT ZERO - u'1' # 0xf1 -> DIGIT ONE - u'2' # 0xf2 -> DIGIT TWO - u'3' # 0xf3 -> DIGIT THREE - u'4' # 0xf4 -> DIGIT FOUR - u'5' # 0xf5 -> DIGIT FIVE - u'6' # 0xf6 -> DIGIT SIX - u'7' # 0xf7 -> DIGIT SEVEN - u'8' # 0xf8 -> DIGIT EIGHT - u'9' # 0xf9 -> DIGIT NINE - u'\xb3' # 0xfa -> SUPERSCRIPT THREE - u'\xa9' # 0xfb -> COPYRIGHT SIGN - u'\x1a' # 0xfc -> SUBSTITUTE - u'\x1a' # 0xfd -> SUBSTITUTE - u'\xbb' # 0xfe -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\x9f' # 0xff -> CONTROL + u'\u03b7' # 0x9A -> GREEK SMALL LETTER ETA + u'\u03b8' # 0x9B -> GREEK SMALL LETTER THETA + u'\u03b9' # 0x9C -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0x9D -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x9E -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x9F -> GREEK SMALL LETTER MU + u'\xb4' # 0xA0 -> ACUTE ACCENT + u'~' # 0xA1 -> TILDE + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\u03bd' # 0xAA -> GREEK SMALL LETTER NU + u'\u03be' # 0xAB -> GREEK SMALL LETTER XI + u'\u03bf' # 0xAC -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0xAD -> GREEK SMALL LETTER PI + u'\u03c1' # 0xAE -> GREEK SMALL LETTER RHO + u'\u03c3' # 0xAF -> GREEK SMALL LETTER SIGMA + u'\xa3' # 0xB0 -> POUND SIGN + u'\u03ac' # 0xB1 -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0xB2 -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0xB3 -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03ca' # 0xB4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03af' # 0xB5 -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03cc' # 0xB6 -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0xB7 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03cb' # 0xB8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03ce' # 0xB9 -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u03c2' # 0xBA -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c4' # 0xBB -> GREEK SMALL LETTER TAU + u'\u03c5' # 0xBC -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0xBD -> GREEK SMALL LETTER PHI + u'\u03c7' # 0xBE -> GREEK SMALL LETTER CHI + u'\u03c8' # 0xBF -> GREEK SMALL LETTER PSI + u'{' # 0xC0 -> LEFT CURLY BRACKET + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\u03c9' # 0xCB -> GREEK SMALL LETTER OMEGA + u'\u0390' # 0xCC -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u03b0' # 0xCD -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u2018' # 0xCE -> LEFT SINGLE QUOTATION MARK + u'\u2015' # 0xCF -> HORIZONTAL BAR + u'}' # 0xD0 -> RIGHT CURLY BRACKET + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb1' # 0xDA -> PLUS-MINUS SIGN + u'\xbd' # 0xDB -> VULGAR FRACTION ONE HALF + u'\x1a' # 0xDC -> SUBSTITUTE + u'\u0387' # 0xDD -> GREEK ANO TELEIA + u'\u2019' # 0xDE -> RIGHT SINGLE QUOTATION MARK + u'\xa6' # 0xDF -> BROKEN BAR + u'\\' # 0xE0 -> REVERSE SOLIDUS + u'\x1a' # 0xE1 -> SUBSTITUTE + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\xa7' # 0xEB -> SECTION SIGN + u'\x1a' # 0xEC -> SUBSTITUTE + u'\x1a' # 0xED -> SUBSTITUTE + u'\xab' # 0xEE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xEF -> NOT SIGN + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\xa9' # 0xFB -> COPYRIGHT SIGN + u'\x1a' # 0xFC -> SUBSTITUTE + u'\x1a' # 0xFD -> SUBSTITUTE + u'\xbb' # 0xFE -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\x9f' # 0xFF -> CONTROL ) ### Encoding Map @@ -298,97 +298,97 @@ 0x0002: 0x02, # START OF TEXT 0x0003: 0x03, # END OF TEXT 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2d, # ENQUIRY - 0x0006: 0x2e, # ACKNOWLEDGE - 0x0007: 0x2f, # BELL + 0x0005: 0x2D, # ENQUIRY + 0x0006: 0x2E, # ACKNOWLEDGE + 0x0007: 0x2F, # BELL 0x0008: 0x16, # BACKSPACE 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000a: 0x25, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x25, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3c, # DEVICE CONTROL FOUR - 0x0015: 0x3d, # NEGATIVE ACKNOWLEDGE + 0x0014: 0x3C, # DEVICE CONTROL FOUR + 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE 0x0016: 0x32, # SYNCHRONOUS IDLE 0x0017: 0x26, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: None, # SUBSTITUTE - 0x001b: 0x27, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: None, # SUBSTITUTE + 0x001B: 0x27, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x40, # SPACE - 0x0021: 0x4f, # EXCLAMATION MARK - 0x0022: 0x7f, # QUOTATION MARK - 0x0023: 0x7b, # NUMBER SIGN - 0x0024: 0x5b, # DOLLAR SIGN - 0x0025: 0x6c, # PERCENT SIGN + 0x0021: 0x4F, # EXCLAMATION MARK + 0x0022: 0x7F, # QUOTATION MARK + 0x0023: 0x7B, # NUMBER SIGN + 0x0024: 0x5B, # DOLLAR SIGN + 0x0025: 0x6C, # PERCENT SIGN 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7d, # APOSTROPHE - 0x0028: 0x4d, # LEFT PARENTHESIS - 0x0029: 0x5d, # RIGHT PARENTHESIS - 0x002a: 0x5c, # ASTERISK - 0x002b: 0x4e, # PLUS SIGN - 0x002c: 0x6b, # COMMA - 0x002d: 0x60, # HYPHEN-MINUS - 0x002e: 0x4b, # FULL STOP - 0x002f: 0x61, # SOLIDUS - 0x0030: 0xf0, # DIGIT ZERO - 0x0031: 0xf1, # DIGIT ONE - 0x0032: 0xf2, # DIGIT TWO - 0x0033: 0xf3, # DIGIT THREE - 0x0034: 0xf4, # DIGIT FOUR - 0x0035: 0xf5, # DIGIT FIVE - 0x0036: 0xf6, # DIGIT SIX - 0x0037: 0xf7, # DIGIT SEVEN - 0x0038: 0xf8, # DIGIT EIGHT - 0x0039: 0xf9, # DIGIT NINE - 0x003a: 0x7a, # COLON - 0x003b: 0x5e, # SEMICOLON - 0x003c: 0x4c, # LESS-THAN SIGN - 0x003d: 0x7e, # EQUALS SIGN - 0x003e: 0x6e, # GREATER-THAN SIGN - 0x003f: 0x6f, # QUESTION MARK - 0x0040: 0x7c, # COMMERCIAL AT - 0x0041: 0xc1, # LATIN CAPITAL LETTER A - 0x0042: 0xc2, # LATIN CAPITAL LETTER B - 0x0043: 0xc3, # LATIN CAPITAL LETTER C - 0x0044: 0xc4, # LATIN CAPITAL LETTER D - 0x0045: 0xc5, # LATIN CAPITAL LETTER E - 0x0046: 0xc6, # LATIN CAPITAL LETTER F - 0x0047: 0xc7, # LATIN CAPITAL LETTER G - 0x0048: 0xc8, # LATIN CAPITAL LETTER H - 0x0049: 0xc9, # LATIN CAPITAL LETTER I - 0x004a: 0xd1, # LATIN CAPITAL LETTER J - 0x004b: 0xd2, # LATIN CAPITAL LETTER K - 0x004c: 0xd3, # LATIN CAPITAL LETTER L - 0x004d: 0xd4, # LATIN CAPITAL LETTER M - 0x004e: 0xd5, # LATIN CAPITAL LETTER N - 0x004f: 0xd6, # LATIN CAPITAL LETTER O - 0x0050: 0xd7, # LATIN CAPITAL LETTER P - 0x0051: 0xd8, # LATIN CAPITAL LETTER Q - 0x0052: 0xd9, # LATIN CAPITAL LETTER R - 0x0053: 0xe2, # LATIN CAPITAL LETTER S - 0x0054: 0xe3, # LATIN CAPITAL LETTER T - 0x0055: 0xe4, # LATIN CAPITAL LETTER U - 0x0056: 0xe5, # LATIN CAPITAL LETTER V - 0x0057: 0xe6, # LATIN CAPITAL LETTER W - 0x0058: 0xe7, # LATIN CAPITAL LETTER X - 0x0059: 0xe8, # LATIN CAPITAL LETTER Y - 0x005a: 0xe9, # LATIN CAPITAL LETTER Z - 0x005b: 0x4a, # LEFT SQUARE BRACKET - 0x005c: 0xe0, # REVERSE SOLIDUS - 0x005d: 0x5a, # RIGHT SQUARE BRACKET - 0x005e: 0x5f, # CIRCUMFLEX ACCENT - 0x005f: 0x6d, # LOW LINE + 0x0027: 0x7D, # APOSTROPHE + 0x0028: 0x4D, # LEFT PARENTHESIS + 0x0029: 0x5D, # RIGHT PARENTHESIS + 0x002A: 0x5C, # ASTERISK + 0x002B: 0x4E, # PLUS SIGN + 0x002C: 0x6B, # COMMA + 0x002D: 0x60, # HYPHEN-MINUS + 0x002E: 0x4B, # FULL STOP + 0x002F: 0x61, # SOLIDUS + 0x0030: 0xF0, # DIGIT ZERO + 0x0031: 0xF1, # DIGIT ONE + 0x0032: 0xF2, # DIGIT TWO + 0x0033: 0xF3, # DIGIT THREE + 0x0034: 0xF4, # DIGIT FOUR + 0x0035: 0xF5, # DIGIT FIVE + 0x0036: 0xF6, # DIGIT SIX + 0x0037: 0xF7, # DIGIT SEVEN + 0x0038: 0xF8, # DIGIT EIGHT + 0x0039: 0xF9, # DIGIT NINE + 0x003A: 0x7A, # COLON + 0x003B: 0x5E, # SEMICOLON + 0x003C: 0x4C, # LESS-THAN SIGN + 0x003D: 0x7E, # EQUALS SIGN + 0x003E: 0x6E, # GREATER-THAN SIGN + 0x003F: 0x6F, # QUESTION MARK + 0x0040: 0x7C, # COMMERCIAL AT + 0x0041: 0xC1, # LATIN CAPITAL LETTER A + 0x0042: 0xC2, # LATIN CAPITAL LETTER B + 0x0043: 0xC3, # LATIN CAPITAL LETTER C + 0x0044: 0xC4, # LATIN CAPITAL LETTER D + 0x0045: 0xC5, # LATIN CAPITAL LETTER E + 0x0046: 0xC6, # LATIN CAPITAL LETTER F + 0x0047: 0xC7, # LATIN CAPITAL LETTER G + 0x0048: 0xC8, # LATIN CAPITAL LETTER H + 0x0049: 0xC9, # LATIN CAPITAL LETTER I + 0x004A: 0xD1, # LATIN CAPITAL LETTER J + 0x004B: 0xD2, # LATIN CAPITAL LETTER K + 0x004C: 0xD3, # LATIN CAPITAL LETTER L + 0x004D: 0xD4, # LATIN CAPITAL LETTER M + 0x004E: 0xD5, # LATIN CAPITAL LETTER N + 0x004F: 0xD6, # LATIN CAPITAL LETTER O + 0x0050: 0xD7, # LATIN CAPITAL LETTER P + 0x0051: 0xD8, # LATIN CAPITAL LETTER Q + 0x0052: 0xD9, # LATIN CAPITAL LETTER R + 0x0053: 0xE2, # LATIN CAPITAL LETTER S + 0x0054: 0xE3, # LATIN CAPITAL LETTER T + 0x0055: 0xE4, # LATIN CAPITAL LETTER U + 0x0056: 0xE5, # LATIN CAPITAL LETTER V + 0x0057: 0xE6, # LATIN CAPITAL LETTER W + 0x0058: 0xE7, # LATIN CAPITAL LETTER X + 0x0059: 0xE8, # LATIN CAPITAL LETTER Y + 0x005A: 0xE9, # LATIN CAPITAL LETTER Z + 0x005B: 0x4A, # LEFT SQUARE BRACKET + 0x005C: 0xE0, # REVERSE SOLIDUS + 0x005D: 0x5A, # RIGHT SQUARE BRACKET + 0x005E: 0x5F, # CIRCUMFLEX ACCENT + 0x005F: 0x6D, # LOW LINE 0x0060: 0x79, # GRAVE ACCENT 0x0061: 0x81, # LATIN SMALL LETTER A 0x0062: 0x82, # LATIN SMALL LETTER B @@ -399,28 +399,28 @@ 0x0067: 0x87, # LATIN SMALL LETTER G 0x0068: 0x88, # LATIN SMALL LETTER H 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006a: 0x91, # LATIN SMALL LETTER J - 0x006b: 0x92, # LATIN SMALL LETTER K - 0x006c: 0x93, # LATIN SMALL LETTER L - 0x006d: 0x94, # LATIN SMALL LETTER M - 0x006e: 0x95, # LATIN SMALL LETTER N - 0x006f: 0x96, # LATIN SMALL LETTER O + 0x006A: 0x91, # LATIN SMALL LETTER J + 0x006B: 0x92, # LATIN SMALL LETTER K + 0x006C: 0x93, # LATIN SMALL LETTER L + 0x006D: 0x94, # LATIN SMALL LETTER M + 0x006E: 0x95, # LATIN SMALL LETTER N + 0x006F: 0x96, # LATIN SMALL LETTER O 0x0070: 0x97, # LATIN SMALL LETTER P 0x0071: 0x98, # LATIN SMALL LETTER Q 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xa2, # LATIN SMALL LETTER S - 0x0074: 0xa3, # LATIN SMALL LETTER T - 0x0075: 0xa4, # LATIN SMALL LETTER U - 0x0076: 0xa5, # LATIN SMALL LETTER V - 0x0077: 0xa6, # LATIN SMALL LETTER W - 0x0078: 0xa7, # LATIN SMALL LETTER X - 0x0079: 0xa8, # LATIN SMALL LETTER Y - 0x007a: 0xa9, # LATIN SMALL LETTER Z - 0x007b: 0xc0, # LEFT CURLY BRACKET - 0x007c: 0x6a, # VERTICAL LINE - 0x007d: 0xd0, # RIGHT CURLY BRACKET - 0x007e: 0xa1, # TILDE - 0x007f: 0x07, # DELETE + 0x0073: 0xA2, # LATIN SMALL LETTER S + 0x0074: 0xA3, # LATIN SMALL LETTER T + 0x0075: 0xA4, # LATIN SMALL LETTER U + 0x0076: 0xA5, # LATIN SMALL LETTER V + 0x0077: 0xA6, # LATIN SMALL LETTER W + 0x0078: 0xA7, # LATIN SMALL LETTER X + 0x0079: 0xA8, # LATIN SMALL LETTER Y + 0x007A: 0xA9, # LATIN SMALL LETTER Z + 0x007B: 0xC0, # LEFT CURLY BRACKET + 0x007C: 0x6A, # VERTICAL LINE + 0x007D: 0xD0, # RIGHT CURLY BRACKET + 0x007E: 0xA1, # TILDE + 0x007F: 0x07, # DELETE 0x0080: 0x20, # CONTROL 0x0081: 0x21, # CONTROL 0x0082: 0x22, # CONTROL @@ -431,15 +431,15 @@ 0x0087: 0x17, # CONTROL 0x0088: 0x28, # CONTROL 0x0089: 0x29, # CONTROL - 0x008a: 0x2a, # CONTROL - 0x008b: 0x2b, # CONTROL - 0x008c: 0x2c, # CONTROL - 0x008d: 0x09, # CONTROL - 0x008e: 0x0a, # CONTROL - 0x008f: 0x1b, # CONTROL + 0x008A: 0x2A, # CONTROL + 0x008B: 0x2B, # CONTROL + 0x008C: 0x2C, # CONTROL + 0x008D: 0x09, # CONTROL + 0x008E: 0x0A, # CONTROL + 0x008F: 0x1B, # CONTROL 0x0090: 0x30, # CONTROL 0x0091: 0x31, # CONTROL - 0x0092: 0x1a, # CONTROL + 0x0092: 0x1A, # CONTROL 0x0093: 0x33, # CONTROL 0x0094: 0x34, # CONTROL 0x0095: 0x35, # CONTROL @@ -447,38 +447,38 @@ 0x0097: 0x08, # CONTROL 0x0098: 0x38, # CONTROL 0x0099: 0x39, # CONTROL - 0x009a: 0x3a, # CONTROL - 0x009b: 0x3b, # CONTROL - 0x009c: 0x04, # CONTROL - 0x009d: 0x14, # CONTROL - 0x009e: 0x3e, # CONTROL - 0x009f: 0xff, # CONTROL - 0x00a0: 0x74, # NO-BREAK SPACE - 0x00a3: 0xb0, # POUND SIGN - 0x00a6: 0xdf, # BROKEN BAR - 0x00a7: 0xeb, # SECTION SIGN - 0x00a8: 0x70, # DIAERESIS - 0x00a9: 0xfb, # COPYRIGHT SIGN - 0x00ab: 0xee, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xef, # NOT SIGN - 0x00ad: 0xca, # SOFT HYPHEN - 0x00b0: 0x90, # DEGREE SIGN - 0x00b1: 0xda, # PLUS-MINUS SIGN - 0x00b2: 0xea, # SUPERSCRIPT TWO - 0x00b3: 0xfa, # SUPERSCRIPT THREE - 0x00b4: 0xa0, # ACUTE ACCENT - 0x00bb: 0xfe, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bd: 0xdb, # VULGAR FRACTION ONE HALF + 0x009A: 0x3A, # CONTROL + 0x009B: 0x3B, # CONTROL + 0x009C: 0x04, # CONTROL + 0x009D: 0x14, # CONTROL + 0x009E: 0x3E, # CONTROL + 0x009F: 0xFF, # CONTROL + 0x00A0: 0x74, # NO-BREAK SPACE + 0x00A3: 0xB0, # POUND SIGN + 0x00A6: 0xDF, # BROKEN BAR + 0x00A7: 0xEB, # SECTION SIGN + 0x00A8: 0x70, # DIAERESIS + 0x00A9: 0xFB, # COPYRIGHT SIGN + 0x00AB: 0xEE, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xEF, # NOT SIGN + 0x00AD: 0xCA, # SOFT HYPHEN + 0x00B0: 0x90, # DEGREE SIGN + 0x00B1: 0xDA, # PLUS-MINUS SIGN + 0x00B2: 0xEA, # SUPERSCRIPT TWO + 0x00B3: 0xFA, # SUPERSCRIPT THREE + 0x00B4: 0xA0, # ACUTE ACCENT + 0x00BB: 0xFE, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BD: 0xDB, # VULGAR FRACTION ONE HALF 0x0385: 0x80, # GREEK DIALYTIKA TONOS 0x0386: 0x71, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0387: 0xdd, # GREEK ANO TELEIA + 0x0387: 0xDD, # GREEK ANO TELEIA 0x0388: 0x72, # GREEK CAPITAL LETTER EPSILON WITH TONOS 0x0389: 0x73, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038a: 0x75, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038c: 0x76, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038e: 0x77, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038f: 0x78, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0390: 0xcc, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x038A: 0x75, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038C: 0x76, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038E: 0x77, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038F: 0x78, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0xCC, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 0x0391: 0x41, # GREEK CAPITAL LETTER ALPHA 0x0392: 0x42, # GREEK CAPITAL LETTER BETA 0x0393: 0x43, # GREEK CAPITAL LETTER GAMMA @@ -488,59 +488,60 @@ 0x0397: 0x47, # GREEK CAPITAL LETTER ETA 0x0398: 0x48, # GREEK CAPITAL LETTER THETA 0x0399: 0x49, # GREEK CAPITAL LETTER IOTA - 0x039a: 0x51, # GREEK CAPITAL LETTER KAPPA - 0x039b: 0x52, # GREEK CAPITAL LETTER LAMDA - 0x039c: 0x53, # GREEK CAPITAL LETTER MU - 0x039d: 0x54, # GREEK CAPITAL LETTER NU - 0x039e: 0x55, # GREEK CAPITAL LETTER XI - 0x039f: 0x56, # GREEK CAPITAL LETTER OMICRON - 0x03a0: 0x57, # GREEK CAPITAL LETTER PI - 0x03a1: 0x58, # GREEK CAPITAL LETTER RHO - 0x03a3: 0x59, # GREEK CAPITAL LETTER SIGMA - 0x03a4: 0x62, # GREEK CAPITAL LETTER TAU - 0x03a5: 0x63, # GREEK CAPITAL LETTER UPSILON - 0x03a6: 0x64, # GREEK CAPITAL LETTER PHI - 0x03a7: 0x65, # GREEK CAPITAL LETTER CHI - 0x03a8: 0x66, # GREEK CAPITAL LETTER PSI - 0x03a9: 0x67, # GREEK CAPITAL LETTER OMEGA - 0x03aa: 0x68, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03ab: 0x69, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03ac: 0xb1, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03ad: 0xb2, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03ae: 0xb3, # GREEK SMALL LETTER ETA WITH TONOS - 0x03af: 0xb5, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03b0: 0xcd, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03b1: 0x8a, # GREEK SMALL LETTER ALPHA - 0x03b2: 0x8b, # GREEK SMALL LETTER BETA - 0x03b3: 0x8c, # GREEK SMALL LETTER GAMMA - 0x03b4: 0x8d, # GREEK SMALL LETTER DELTA - 0x03b5: 0x8e, # GREEK SMALL LETTER EPSILON - 0x03b6: 0x8f, # GREEK SMALL LETTER ZETA - 0x03b7: 0x9a, # GREEK SMALL LETTER ETA - 0x03b8: 0x9b, # GREEK SMALL LETTER THETA - 0x03b9: 0x9c, # GREEK SMALL LETTER IOTA - 0x03ba: 0x9d, # GREEK SMALL LETTER KAPPA - 0x03bb: 0x9e, # GREEK SMALL LETTER LAMDA - 0x03bc: 0x9f, # GREEK SMALL LETTER MU - 0x03bd: 0xaa, # GREEK SMALL LETTER NU - 0x03be: 0xab, # GREEK SMALL LETTER XI - 0x03bf: 0xac, # GREEK SMALL LETTER OMICRON - 0x03c0: 0xad, # GREEK SMALL LETTER PI - 0x03c1: 0xae, # GREEK SMALL LETTER RHO - 0x03c2: 0xba, # GREEK SMALL LETTER FINAL SIGMA - 0x03c3: 0xaf, # GREEK SMALL LETTER SIGMA - 0x03c4: 0xbb, # GREEK SMALL LETTER TAU - 0x03c5: 0xbc, # GREEK SMALL LETTER UPSILON - 0x03c6: 0xbd, # GREEK SMALL LETTER PHI - 0x03c7: 0xbe, # GREEK SMALL LETTER CHI - 0x03c8: 0xbf, # GREEK SMALL LETTER PSI - 0x03c9: 0xcb, # GREEK SMALL LETTER OMEGA - 0x03ca: 0xb4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03cb: 0xb8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03cc: 0xb6, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03cd: 0xb7, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03ce: 0xb9, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x2015: 0xcf, # HORIZONTAL BAR - 0x2018: 0xce, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xde, # RIGHT SINGLE QUOTATION MARK -} \ No newline at end of file + 0x039A: 0x51, # GREEK CAPITAL LETTER KAPPA + 0x039B: 0x52, # GREEK CAPITAL LETTER LAMDA + 0x039C: 0x53, # GREEK CAPITAL LETTER MU + 0x039D: 0x54, # GREEK CAPITAL LETTER NU + 0x039E: 0x55, # GREEK CAPITAL LETTER XI + 0x039F: 0x56, # GREEK CAPITAL LETTER OMICRON + 0x03A0: 0x57, # GREEK CAPITAL LETTER PI + 0x03A1: 0x58, # GREEK CAPITAL LETTER RHO + 0x03A3: 0x59, # GREEK CAPITAL LETTER SIGMA + 0x03A4: 0x62, # GREEK CAPITAL LETTER TAU + 0x03A5: 0x63, # GREEK CAPITAL LETTER UPSILON + 0x03A6: 0x64, # GREEK CAPITAL LETTER PHI + 0x03A7: 0x65, # GREEK CAPITAL LETTER CHI + 0x03A8: 0x66, # GREEK CAPITAL LETTER PSI + 0x03A9: 0x67, # GREEK CAPITAL LETTER OMEGA + 0x03AA: 0x68, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03AB: 0x69, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03AC: 0xB1, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03AD: 0xB2, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03AE: 0xB3, # GREEK SMALL LETTER ETA WITH TONOS + 0x03AF: 0xB5, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03B0: 0xCD, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03B1: 0x8A, # GREEK SMALL LETTER ALPHA + 0x03B2: 0x8B, # GREEK SMALL LETTER BETA + 0x03B3: 0x8C, # GREEK SMALL LETTER GAMMA + 0x03B4: 0x8D, # GREEK SMALL LETTER DELTA + 0x03B5: 0x8E, # GREEK SMALL LETTER EPSILON + 0x03B6: 0x8F, # GREEK SMALL LETTER ZETA + 0x03B7: 0x9A, # GREEK SMALL LETTER ETA + 0x03B8: 0x9B, # GREEK SMALL LETTER THETA + 0x03B9: 0x9C, # GREEK SMALL LETTER IOTA + 0x03BA: 0x9D, # GREEK SMALL LETTER KAPPA + 0x03BB: 0x9E, # GREEK SMALL LETTER LAMDA + 0x03BC: 0x9F, # GREEK SMALL LETTER MU + 0x03BD: 0xAA, # GREEK SMALL LETTER NU + 0x03BE: 0xAB, # GREEK SMALL LETTER XI + 0x03BF: 0xAC, # GREEK SMALL LETTER OMICRON + 0x03C0: 0xAD, # GREEK SMALL LETTER PI + 0x03C1: 0xAE, # GREEK SMALL LETTER RHO + 0x03C2: 0xBA, # GREEK SMALL LETTER FINAL SIGMA + 0x03C3: 0xAF, # GREEK SMALL LETTER SIGMA + 0x03C4: 0xBB, # GREEK SMALL LETTER TAU + 0x03C5: 0xBC, # GREEK SMALL LETTER UPSILON + 0x03C6: 0xBD, # GREEK SMALL LETTER PHI + 0x03C7: 0xBE, # GREEK SMALL LETTER CHI + 0x03C8: 0xBF, # GREEK SMALL LETTER PSI + 0x03C9: 0xCB, # GREEK SMALL LETTER OMEGA + 0x03CA: 0xB4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03CB: 0xB8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03CC: 0xB6, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03CD: 0xB7, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03CE: 0xB9, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2015: 0xCF, # HORIZONTAL BAR + 0x2018: 0xCE, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xDE, # RIGHT SINGLE QUOTATION MARK +} + Index: iso8859_1.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_1.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- iso8859_1.py 24 Oct 2005 12:07:48 -0000 1.6 +++ iso8859_1.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,108 +186,108 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\xa1' # 0xa1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa4' # 0xa4 -> CURRENCY SIGN - u'\xa5' # 0xa5 -> YEN SIGN - u'\xa6' # 0xa6 -> BROKEN BAR - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xa8' # 0xa8 -> DIAERESIS - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\xaa' # 0xaa -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xac -> NOT SIGN - u'\xad' # 0xad -> SOFT HYPHEN - u'\xae' # 0xae -> REGISTERED SIGN - u'\xaf' # 0xaf -> MACRON - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\xb2' # 0xb2 -> SUPERSCRIPT TWO - u'\xb3' # 0xb3 -> SUPERSCRIPT THREE - u'\xb4' # 0xb4 -> ACUTE ACCENT - u'\xb5' # 0xb5 -> MICRO SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\xb8' # 0xb8 -> CEDILLA - u'\xb9' # 0xb9 -> SUPERSCRIPT ONE - u'\xba' # 0xba -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xbc -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xbe -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xbf -> INVERTED QUESTION MARK - u'\xc0' # 0xc0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xc1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xc2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xc3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xc4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xc5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xc6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xc7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xc8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xc9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xcb -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xcc -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xcd -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xcf -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xd0 -> LATIN CAPITAL LETTER ETH (Icelandic) - u'\xd1' # 0xd1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xd2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xd3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xd4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xd5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xd6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xd7 -> MULTIPLICATION SIGN - u'\xd8' # 0xd8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xd9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xda -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xdb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xdc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xdd -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xde -> LATIN CAPITAL LETTER THORN (Icelandic) - u'\xdf' # 0xdf -> LATIN SMALL LETTER SHARP S (German) - u'\xe0' # 0xe0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xe1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xe2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xe3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xe4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xe5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xe6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xe7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xe8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xea -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xeb -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xec -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xed -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xee -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xef -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xf0 -> LATIN SMALL LETTER ETH (Icelandic) - u'\xf1' # 0xf1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xf2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xf3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xf4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xf5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xf6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xf7 -> DIVISION SIGN - u'\xf8' # 0xf8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xf9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xfa -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xfb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xfd -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xfe -> LATIN SMALL LETTER THORN (Icelandic) - u'\xff' # 0xff -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic) + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic) + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic) + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic) + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,106 +447,107 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a1: 0xa1, # INVERTED EXCLAMATION MARK - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00a5: 0xa5, # YEN SIGN - 0x00a6: 0xa6, # BROKEN BAR - 0x00a7: 0xa7, # SECTION SIGN - 0x00a8: 0xa8, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00aa: 0xaa, # FEMININE ORDINAL INDICATOR - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xac, # NOT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00ae: 0xae, # REGISTERED SIGN - 0x00af: 0xaf, # MACRON - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b2: 0xb2, # SUPERSCRIPT TWO - 0x00b3: 0xb3, # SUPERSCRIPT THREE - 0x00b4: 0xb4, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00b8: 0xb8, # CEDILLA - 0x00b9: 0xb9, # SUPERSCRIPT ONE - 0x00ba: 0xba, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xbc, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xbd, # VULGAR FRACTION ONE HALF - 0x00be: 0xbe, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0xbf, # INVERTED QUESTION MARK - 0x00c0: 0xc0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0xc1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xc2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0xc3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0xc4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0xc5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0xc6, # LATIN CAPITAL LETTER AE - 0x00c7: 0xc7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0xc8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0xc9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0xca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0xcb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0xcc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0xcd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0xcf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d0: 0xd0, # LATIN CAPITAL LETTER ETH (Icelandic) - 0x00d1: 0xd1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xd2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xd3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xd4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xd5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0xd6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xd7, # MULTIPLICATION SIGN - 0x00d8: 0xd8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0xd9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xda, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xdb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0xdc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0xdd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0xde, # LATIN CAPITAL LETTER THORN (Icelandic) - 0x00df: 0xdf, # LATIN SMALL LETTER SHARP S (German) - 0x00e0: 0xe0, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0xe1, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0xe2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0xe3, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0xe4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0xe5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0xe6, # LATIN SMALL LETTER AE - 0x00e7: 0xe7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0xe8, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0xea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0xeb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0xec, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0xed, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0xee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0xef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f0: 0xf0, # LATIN SMALL LETTER ETH (Icelandic) - 0x00f1: 0xf1, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0xf2, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0xf5, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xf7, # DIVISION SIGN - 0x00f8: 0xf8, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0xf9, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0xfb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0xfd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0xfe, # LATIN SMALL LETTER THORN (Icelandic) - 0x00ff: 0xff, # LATIN SMALL LETTER Y WITH DIAERESIS -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xBF, # INVERTED QUESTION MARK + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH (Icelandic) + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN (Icelandic) + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S (German) + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0xF0, # LATIN SMALL LETTER ETH (Icelandic) + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0xFE, # LATIN SMALL LETTER THORN (Icelandic) + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS +} + Index: iso8859_10.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_10.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- iso8859_10.py 24 Oct 2005 12:07:48 -0000 1.6 +++ iso8859_10.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,108 +186,108 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u0104' # 0xa1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u0112' # 0xa2 -> LATIN CAPITAL LETTER E WITH MACRON - u'\u0122' # 0xa3 -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u012a' # 0xa4 -> LATIN CAPITAL LETTER I WITH MACRON - u'\u0128' # 0xa5 -> LATIN CAPITAL LETTER I WITH TILDE - u'\u0136' # 0xa6 -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\xa7' # 0xa7 -> SECTION SIGN - u'\u013b' # 0xa8 -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u0110' # 0xa9 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0160' # 0xaa -> LATIN CAPITAL LETTER S WITH CARON - u'\u0166' # 0xab -> LATIN CAPITAL LETTER T WITH STROKE - u'\u017d' # 0xac -> LATIN CAPITAL LETTER Z WITH CARON - u'\xad' # 0xad -> SOFT HYPHEN - u'\u016a' # 0xae -> LATIN CAPITAL LETTER U WITH MACRON - u'\u014a' # 0xaf -> LATIN CAPITAL LETTER ENG - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\u0105' # 0xb1 -> LATIN SMALL LETTER A WITH OGONEK - u'\u0113' # 0xb2 -> LATIN SMALL LETTER E WITH MACRON - u'\u0123' # 0xb3 -> LATIN SMALL LETTER G WITH CEDILLA - u'\u012b' # 0xb4 -> LATIN SMALL LETTER I WITH MACRON - u'\u0129' # 0xb5 -> LATIN SMALL LETTER I WITH TILDE - u'\u0137' # 0xb6 -> LATIN SMALL LETTER K WITH CEDILLA - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\u013c' # 0xb8 -> LATIN SMALL LETTER L WITH CEDILLA - u'\u0111' # 0xb9 -> LATIN SMALL LETTER D WITH STROKE - u'\u0161' # 0xba -> LATIN SMALL LETTER S WITH CARON - u'\u0167' # 0xbb -> LATIN SMALL LETTER T WITH STROKE - u'\u017e' # 0xbc -> LATIN SMALL LETTER Z WITH CARON - u'\u2015' # 0xbd -> HORIZONTAL BAR - u'\u016b' # 0xbe -> LATIN SMALL LETTER U WITH MACRON - u'\u014b' # 0xbf -> LATIN SMALL LETTER ENG - u'\u0100' # 0xc0 -> LATIN CAPITAL LETTER A WITH MACRON - u'\xc1' # 0xc1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xc2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xc3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xc4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xc5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xc6 -> LATIN CAPITAL LETTER AE - u'\u012e' # 0xc7 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u010c' # 0xc8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xc9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xca -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xcb -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u0116' # 0xcc -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\xcd' # 0xcd -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xcf -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xd0 -> LATIN CAPITAL LETTER ETH (Icelandic) - u'\u0145' # 0xd1 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\u014c' # 0xd2 -> LATIN CAPITAL LETTER O WITH MACRON - u'\xd3' # 0xd3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xd4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xd5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xd6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u0168' # 0xd7 -> LATIN CAPITAL LETTER U WITH TILDE - u'\xd8' # 0xd8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\u0172' # 0xd9 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\xda' # 0xda -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xdb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xdc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xdd -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xde -> LATIN CAPITAL LETTER THORN (Icelandic) - u'\xdf' # 0xdf -> LATIN SMALL LETTER SHARP S (German) - u'\u0101' # 0xe0 -> LATIN SMALL LETTER A WITH MACRON - u'\xe1' # 0xe1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xe2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xe3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xe4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xe5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xe6 -> LATIN SMALL LETTER AE - u'\u012f' # 0xe7 -> LATIN SMALL LETTER I WITH OGONEK - u'\u010d' # 0xe8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xea -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xeb -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0117' # 0xec -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\xed' # 0xed -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xee -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xef -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xf0 -> LATIN SMALL LETTER ETH (Icelandic) - u'\u0146' # 0xf1 -> LATIN SMALL LETTER N WITH CEDILLA - u'\u014d' # 0xf2 -> LATIN SMALL LETTER O WITH MACRON - u'\xf3' # 0xf3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xf4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xf5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xf6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u0169' # 0xf7 -> LATIN SMALL LETTER U WITH TILDE - u'\xf8' # 0xf8 -> LATIN SMALL LETTER O WITH STROKE - u'\u0173' # 0xf9 -> LATIN SMALL LETTER U WITH OGONEK - u'\xfa' # 0xfa -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xfb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xfd -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xfe -> LATIN SMALL LETTER THORN (Icelandic) - u'\u0138' # 0xff -> LATIN SMALL LETTER KRA + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0112' # 0xA2 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0122' # 0xA3 -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u012a' # 0xA4 -> LATIN CAPITAL LETTER I WITH MACRON + u'\u0128' # 0xA5 -> LATIN CAPITAL LETTER I WITH TILDE + u'\u0136' # 0xA6 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\xa7' # 0xA7 -> SECTION SIGN + u'\u013b' # 0xA8 -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u0110' # 0xA9 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0160' # 0xAA -> LATIN CAPITAL LETTER S WITH CARON + u'\u0166' # 0xAB -> LATIN CAPITAL LETTER T WITH STROKE + u'\u017d' # 0xAC -> LATIN CAPITAL LETTER Z WITH CARON + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u016a' # 0xAE -> LATIN CAPITAL LETTER U WITH MACRON + u'\u014a' # 0xAF -> LATIN CAPITAL LETTER ENG + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK + u'\u0113' # 0xB2 -> LATIN SMALL LETTER E WITH MACRON + u'\u0123' # 0xB3 -> LATIN SMALL LETTER G WITH CEDILLA + u'\u012b' # 0xB4 -> LATIN SMALL LETTER I WITH MACRON + u'\u0129' # 0xB5 -> LATIN SMALL LETTER I WITH TILDE + u'\u0137' # 0xB6 -> LATIN SMALL LETTER K WITH CEDILLA + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u013c' # 0xB8 -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0111' # 0xB9 -> LATIN SMALL LETTER D WITH STROKE + u'\u0161' # 0xBA -> LATIN SMALL LETTER S WITH CARON + u'\u0167' # 0xBB -> LATIN SMALL LETTER T WITH STROKE + u'\u017e' # 0xBC -> LATIN SMALL LETTER Z WITH CARON + u'\u2015' # 0xBD -> HORIZONTAL BAR + u'\u016b' # 0xBE -> LATIN SMALL LETTER U WITH MACRON + u'\u014b' # 0xBF -> LATIN SMALL LETTER ENG + u'\u0100' # 0xC0 -> LATIN CAPITAL LETTER A WITH MACRON + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\u012e' # 0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u0116' # 0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic) + u'\u0145' # 0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u014c' # 0xD2 -> LATIN CAPITAL LETTER O WITH MACRON + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u0168' # 0xD7 -> LATIN CAPITAL LETTER U WITH TILDE + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\u0172' # 0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic) + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) + u'\u0101' # 0xE0 -> LATIN SMALL LETTER A WITH MACRON + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\u012f' # 0xE7 -> LATIN SMALL LETTER I WITH OGONEK + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0117' # 0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic) + u'\u0146' # 0xF1 -> LATIN SMALL LETTER N WITH CEDILLA + u'\u014d' # 0xF2 -> LATIN SMALL LETTER O WITH MACRON + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u0169' # 0xF7 -> LATIN SMALL LETTER U WITH TILDE + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\u0173' # 0xF9 -> LATIN SMALL LETTER U WITH OGONEK + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic) + u'\u0138' # 0xFF -> LATIN SMALL LETTER KRA ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,106 +447,107 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a7: 0xa7, # SECTION SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00c1: 0xc1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xc2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0xc3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0xc4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0xc5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0xc6, # LATIN CAPITAL LETTER AE - 0x00c9: 0xc9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00cb: 0xcb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cd: 0xcd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0xcf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d0: 0xd0, # LATIN CAPITAL LETTER ETH (Icelandic) - 0x00d3: 0xd3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xd4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xd5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0xd6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d8: 0xd8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00da: 0xda, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xdb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0xdc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0xdd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0xde, # LATIN CAPITAL LETTER THORN (Icelandic) - 0x00df: 0xdf, # LATIN SMALL LETTER SHARP S (German) - 0x00e1: 0xe1, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0xe2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0xe3, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0xe4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0xe5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0xe6, # LATIN SMALL LETTER AE - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00eb: 0xeb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ed: 0xed, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0xee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0xef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f0: 0xf0, # LATIN SMALL LETTER ETH (Icelandic) - 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0xf5, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f8: 0xf8, # LATIN SMALL LETTER O WITH STROKE - 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0xfb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0xfd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0xfe, # LATIN SMALL LETTER THORN (Icelandic) - 0x0100: 0xc0, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0xe0, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0xa1, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xb1, # LATIN SMALL LETTER A WITH OGONEK - 0x010c: 0xc8, # LATIN CAPITAL LETTER C WITH CARON - 0x010d: 0xe8, # LATIN SMALL LETTER C WITH CARON - 0x0110: 0xa9, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xb9, # LATIN SMALL LETTER D WITH STROKE - 0x0112: 0xa2, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0xb2, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0xcc, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0xec, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0xca, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xea, # LATIN SMALL LETTER E WITH OGONEK - 0x0122: 0xa3, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0xb3, # LATIN SMALL LETTER G WITH CEDILLA - 0x0128: 0xa5, # LATIN CAPITAL LETTER I WITH TILDE - 0x0129: 0xb5, # LATIN SMALL LETTER I WITH TILDE - 0x012a: 0xa4, # LATIN CAPITAL LETTER I WITH MACRON - 0x012b: 0xb4, # LATIN SMALL LETTER I WITH MACRON - 0x012e: 0xc7, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012f: 0xe7, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0xa6, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0xb6, # LATIN SMALL LETTER K WITH CEDILLA - 0x0138: 0xff, # LATIN SMALL LETTER KRA - 0x013b: 0xa8, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013c: 0xb8, # LATIN SMALL LETTER L WITH CEDILLA - 0x0145: 0xd1, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0xf1, # LATIN SMALL LETTER N WITH CEDILLA - 0x014a: 0xaf, # LATIN CAPITAL LETTER ENG - 0x014b: 0xbf, # LATIN SMALL LETTER ENG - 0x014c: 0xd2, # LATIN CAPITAL LETTER O WITH MACRON - 0x014d: 0xf2, # LATIN SMALL LETTER O WITH MACRON - 0x0160: 0xaa, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xba, # LATIN SMALL LETTER S WITH CARON - 0x0166: 0xab, # LATIN CAPITAL LETTER T WITH STROKE - 0x0167: 0xbb, # LATIN SMALL LETTER T WITH STROKE - 0x0168: 0xd7, # LATIN CAPITAL LETTER U WITH TILDE - 0x0169: 0xf7, # LATIN SMALL LETTER U WITH TILDE - 0x016a: 0xae, # LATIN CAPITAL LETTER U WITH MACRON - 0x016b: 0xbe, # LATIN SMALL LETTER U WITH MACRON - 0x0172: 0xd9, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0xf9, # LATIN SMALL LETTER U WITH OGONEK - 0x017d: 0xac, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0xbc, # LATIN SMALL LETTER Z WITH CARON - 0x2015: 0xbd, # HORIZONTAL BAR -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A7: 0xA7, # SECTION SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH (Icelandic) + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN (Icelandic) + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S (German) + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0xF0, # LATIN SMALL LETTER ETH (Icelandic) + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0xFE, # LATIN SMALL LETTER THORN (Icelandic) + 0x0100: 0xC0, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0xE0, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0xB1, # LATIN SMALL LETTER A WITH OGONEK + 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON + 0x0110: 0xA9, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0xB9, # LATIN SMALL LETTER D WITH STROKE + 0x0112: 0xA2, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0xB2, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0xCC, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0xEC, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0xA3, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0xB3, # LATIN SMALL LETTER G WITH CEDILLA + 0x0128: 0xA5, # LATIN CAPITAL LETTER I WITH TILDE + 0x0129: 0xB5, # LATIN SMALL LETTER I WITH TILDE + 0x012A: 0xA4, # LATIN CAPITAL LETTER I WITH MACRON + 0x012B: 0xB4, # LATIN SMALL LETTER I WITH MACRON + 0x012E: 0xC7, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012F: 0xE7, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0xA6, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0xB6, # LATIN SMALL LETTER K WITH CEDILLA + 0x0138: 0xFF, # LATIN SMALL LETTER KRA + 0x013B: 0xA8, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013C: 0xB8, # LATIN SMALL LETTER L WITH CEDILLA + 0x0145: 0xD1, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0xF1, # LATIN SMALL LETTER N WITH CEDILLA + 0x014A: 0xAF, # LATIN CAPITAL LETTER ENG + 0x014B: 0xBF, # LATIN SMALL LETTER ENG + 0x014C: 0xD2, # LATIN CAPITAL LETTER O WITH MACRON + 0x014D: 0xF2, # LATIN SMALL LETTER O WITH MACRON + 0x0160: 0xAA, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xBA, # LATIN SMALL LETTER S WITH CARON + 0x0166: 0xAB, # LATIN CAPITAL LETTER T WITH STROKE + 0x0167: 0xBB, # LATIN SMALL LETTER T WITH STROKE + 0x0168: 0xD7, # LATIN CAPITAL LETTER U WITH TILDE + 0x0169: 0xF7, # LATIN SMALL LETTER U WITH TILDE + 0x016A: 0xAE, # LATIN CAPITAL LETTER U WITH MACRON + 0x016B: 0xBE, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0xD9, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0xF9, # LATIN SMALL LETTER U WITH OGONEK + 0x017D: 0xAC, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xBC, # LATIN SMALL LETTER Z WITH CARON + 0x2015: 0xBD, # HORIZONTAL BAR +} + Index: iso8859_11.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_11.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- iso8859_11.py 24 Oct 2005 12:07:48 -0000 1.4 +++ iso8859_11.py 24 Oct 2005 12:14:59 -0000 1.5 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,104 +186,104 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u0e01' # 0xa1 -> THAI CHARACTER KO KAI - u'\u0e02' # 0xa2 -> THAI CHARACTER KHO KHAI - u'\u0e03' # 0xa3 -> THAI CHARACTER KHO KHUAT - u'\u0e04' # 0xa4 -> THAI CHARACTER KHO KHWAI - u'\u0e05' # 0xa5 -> THAI CHARACTER KHO KHON - u'\u0e06' # 0xa6 -> THAI CHARACTER KHO RAKHANG - u'\u0e07' # 0xa7 -> THAI CHARACTER NGO NGU - u'\u0e08' # 0xa8 -> THAI CHARACTER CHO CHAN - u'\u0e09' # 0xa9 -> THAI CHARACTER CHO CHING - u'\u0e0a' # 0xaa -> THAI CHARACTER CHO CHANG - u'\u0e0b' # 0xab -> THAI CHARACTER SO SO - u'\u0e0c' # 0xac -> THAI CHARACTER CHO CHOE - u'\u0e0d' # 0xad -> THAI CHARACTER YO YING - u'\u0e0e' # 0xae -> THAI CHARACTER DO CHADA - u'\u0e0f' # 0xaf -> THAI CHARACTER TO PATAK - u'\u0e10' # 0xb0 -> THAI CHARACTER THO THAN - u'\u0e11' # 0xb1 -> THAI CHARACTER THO NANGMONTHO - u'\u0e12' # 0xb2 -> THAI CHARACTER THO PHUTHAO - u'\u0e13' # 0xb3 -> THAI CHARACTER NO NEN - u'\u0e14' # 0xb4 -> THAI CHARACTER DO DEK - u'\u0e15' # 0xb5 -> THAI CHARACTER TO TAO - u'\u0e16' # 0xb6 -> THAI CHARACTER THO THUNG - u'\u0e17' # 0xb7 -> THAI CHARACTER THO THAHAN - u'\u0e18' # 0xb8 -> THAI CHARACTER THO THONG - u'\u0e19' # 0xb9 -> THAI CHARACTER NO NU - u'\u0e1a' # 0xba -> THAI CHARACTER BO BAIMAI - u'\u0e1b' # 0xbb -> THAI CHARACTER PO PLA - u'\u0e1c' # 0xbc -> THAI CHARACTER PHO PHUNG - u'\u0e1d' # 0xbd -> THAI CHARACTER FO FA - u'\u0e1e' # 0xbe -> THAI CHARACTER PHO PHAN - u'\u0e1f' # 0xbf -> THAI CHARACTER FO FAN - u'\u0e20' # 0xc0 -> THAI CHARACTER PHO SAMPHAO - u'\u0e21' # 0xc1 -> THAI CHARACTER MO MA - u'\u0e22' # 0xc2 -> THAI CHARACTER YO YAK - u'\u0e23' # 0xc3 -> THAI CHARACTER RO RUA - u'\u0e24' # 0xc4 -> THAI CHARACTER RU - u'\u0e25' # 0xc5 -> THAI CHARACTER LO LING - u'\u0e26' # 0xc6 -> THAI CHARACTER LU - u'\u0e27' # 0xc7 -> THAI CHARACTER WO WAEN - u'\u0e28' # 0xc8 -> THAI CHARACTER SO SALA - u'\u0e29' # 0xc9 -> THAI CHARACTER SO RUSI - u'\u0e2a' # 0xca -> THAI CHARACTER SO SUA - u'\u0e2b' # 0xcb -> THAI CHARACTER HO HIP - u'\u0e2c' # 0xcc -> THAI CHARACTER LO CHULA - u'\u0e2d' # 0xcd -> THAI CHARACTER O ANG - u'\u0e2e' # 0xce -> THAI CHARACTER HO NOKHUK - u'\u0e2f' # 0xcf -> THAI CHARACTER PAIYANNOI - u'\u0e30' # 0xd0 -> THAI CHARACTER SARA A - u'\u0e31' # 0xd1 -> THAI CHARACTER MAI HAN-AKAT - u'\u0e32' # 0xd2 -> THAI CHARACTER SARA AA - u'\u0e33' # 0xd3 -> THAI CHARACTER SARA AM - u'\u0e34' # 0xd4 -> THAI CHARACTER SARA I - u'\u0e35' # 0xd5 -> THAI CHARACTER SARA II - u'\u0e36' # 0xd6 -> THAI CHARACTER SARA UE - u'\u0e37' # 0xd7 -> THAI CHARACTER SARA UEE - u'\u0e38' # 0xd8 -> THAI CHARACTER SARA U - u'\u0e39' # 0xd9 -> THAI CHARACTER SARA UU - u'\u0e3a' # 0xda -> THAI CHARACTER PHINTHU + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI + u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI + u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT + u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI + u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON + u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG + u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU + u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN + u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING + u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG + u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO + u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE + u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING + u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA + u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK + u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN + u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO + u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO + u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN + u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK + u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO + u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG + u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN + u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG + u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU + u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI + u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA + u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG + u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA + u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN + u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN + u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO + u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA + u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK + u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA + u'\u0e24' # 0xC4 -> THAI CHARACTER RU + u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING + u'\u0e26' # 0xC6 -> THAI CHARACTER LU + u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN + u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA + u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI + u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA + u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP + u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA + u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG + u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK + u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI + u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A + u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT + u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA + u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM + u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I + u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II + u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE + u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE + u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U + u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU + u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' - u'\u0e3f' # 0xdf -> THAI CURRENCY SYMBOL BAHT - u'\u0e40' # 0xe0 -> THAI CHARACTER SARA E - u'\u0e41' # 0xe1 -> THAI CHARACTER SARA AE - u'\u0e42' # 0xe2 -> THAI CHARACTER SARA O - u'\u0e43' # 0xe3 -> THAI CHARACTER SARA AI MAIMUAN - u'\u0e44' # 0xe4 -> THAI CHARACTER SARA AI MAIMALAI - u'\u0e45' # 0xe5 -> THAI CHARACTER LAKKHANGYAO - u'\u0e46' # 0xe6 -> THAI CHARACTER MAIYAMOK - u'\u0e47' # 0xe7 -> THAI CHARACTER MAITAIKHU - u'\u0e48' # 0xe8 -> THAI CHARACTER MAI EK - u'\u0e49' # 0xe9 -> THAI CHARACTER MAI THO - u'\u0e4a' # 0xea -> THAI CHARACTER MAI TRI - u'\u0e4b' # 0xeb -> THAI CHARACTER MAI CHATTAWA - u'\u0e4c' # 0xec -> THAI CHARACTER THANTHAKHAT - u'\u0e4d' # 0xed -> THAI CHARACTER NIKHAHIT - u'\u0e4e' # 0xee -> THAI CHARACTER YAMAKKAN - u'\u0e4f' # 0xef -> THAI CHARACTER FONGMAN - u'\u0e50' # 0xf0 -> THAI DIGIT ZERO - u'\u0e51' # 0xf1 -> THAI DIGIT ONE - u'\u0e52' # 0xf2 -> THAI DIGIT TWO - u'\u0e53' # 0xf3 -> THAI DIGIT THREE - u'\u0e54' # 0xf4 -> THAI DIGIT FOUR - u'\u0e55' # 0xf5 -> THAI DIGIT FIVE - u'\u0e56' # 0xf6 -> THAI DIGIT SIX - u'\u0e57' # 0xf7 -> THAI DIGIT SEVEN - u'\u0e58' # 0xf8 -> THAI DIGIT EIGHT - u'\u0e59' # 0xf9 -> THAI DIGIT NINE - u'\u0e5a' # 0xfa -> THAI CHARACTER ANGKHANKHU - u'\u0e5b' # 0xfb -> THAI CHARACTER KHOMUT + u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT + u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E + u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE + u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O + u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN + u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI + u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO + u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK + u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU + u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK + u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO + u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI + u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA + u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT + u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT + u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN + u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN + u'\u0e50' # 0xF0 -> THAI DIGIT ZERO + u'\u0e51' # 0xF1 -> THAI DIGIT ONE + u'\u0e52' # 0xF2 -> THAI DIGIT TWO + u'\u0e53' # 0xF3 -> THAI DIGIT THREE + u'\u0e54' # 0xF4 -> THAI DIGIT FOUR + u'\u0e55' # 0xF5 -> THAI DIGIT FIVE + u'\u0e56' # 0xF6 -> THAI DIGIT SIX + u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN + u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT + u'\u0e59' # 0xF9 -> THAI DIGIT NINE + u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU + u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT u'\ufffe' u'\ufffe' u'\ufffe' @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,98 +447,99 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x0e01: 0xa1, # THAI CHARACTER KO KAI - 0x0e02: 0xa2, # THAI CHARACTER KHO KHAI - 0x0e03: 0xa3, # THAI CHARACTER KHO KHUAT - 0x0e04: 0xa4, # THAI CHARACTER KHO KHWAI - 0x0e05: 0xa5, # THAI CHARACTER KHO KHON - 0x0e06: 0xa6, # THAI CHARACTER KHO RAKHANG - 0x0e07: 0xa7, # THAI CHARACTER NGO NGU - 0x0e08: 0xa8, # THAI CHARACTER CHO CHAN - 0x0e09: 0xa9, # THAI CHARACTER CHO CHING - 0x0e0a: 0xaa, # THAI CHARACTER CHO CHANG - 0x0e0b: 0xab, # THAI CHARACTER SO SO - 0x0e0c: 0xac, # THAI CHARACTER CHO CHOE - 0x0e0d: 0xad, # THAI CHARACTER YO YING - 0x0e0e: 0xae, # THAI CHARACTER DO CHADA - 0x0e0f: 0xaf, # THAI CHARACTER TO PATAK - 0x0e10: 0xb0, # THAI CHARACTER THO THAN - 0x0e11: 0xb1, # THAI CHARACTER THO NANGMONTHO - 0x0e12: 0xb2, # THAI CHARACTER THO PHUTHAO - 0x0e13: 0xb3, # THAI CHARACTER NO NEN - 0x0e14: 0xb4, # THAI CHARACTER DO DEK - 0x0e15: 0xb5, # THAI CHARACTER TO TAO - 0x0e16: 0xb6, # THAI CHARACTER THO THUNG - 0x0e17: 0xb7, # THAI CHARACTER THO THAHAN - 0x0e18: 0xb8, # THAI CHARACTER THO THONG - 0x0e19: 0xb9, # THAI CHARACTER NO NU - 0x0e1a: 0xba, # THAI CHARACTER BO BAIMAI - 0x0e1b: 0xbb, # THAI CHARACTER PO PLA - 0x0e1c: 0xbc, # THAI CHARACTER PHO PHUNG - 0x0e1d: 0xbd, # THAI CHARACTER FO FA - 0x0e1e: 0xbe, # THAI CHARACTER PHO PHAN - 0x0e1f: 0xbf, # THAI CHARACTER FO FAN - 0x0e20: 0xc0, # THAI CHARACTER PHO SAMPHAO - 0x0e21: 0xc1, # THAI CHARACTER MO MA - 0x0e22: 0xc2, # THAI CHARACTER YO YAK - 0x0e23: 0xc3, # THAI CHARACTER RO RUA - 0x0e24: 0xc4, # THAI CHARACTER RU - 0x0e25: 0xc5, # THAI CHARACTER LO LING - 0x0e26: 0xc6, # THAI CHARACTER LU - 0x0e27: 0xc7, # THAI CHARACTER WO WAEN - 0x0e28: 0xc8, # THAI CHARACTER SO SALA - 0x0e29: 0xc9, # THAI CHARACTER SO RUSI - 0x0e2a: 0xca, # THAI CHARACTER SO SUA - 0x0e2b: 0xcb, # THAI CHARACTER HO HIP - 0x0e2c: 0xcc, # THAI CHARACTER LO CHULA - 0x0e2d: 0xcd, # THAI CHARACTER O ANG - 0x0e2e: 0xce, # THAI CHARACTER HO NOKHUK - 0x0e2f: 0xcf, # THAI CHARACTER PAIYANNOI - 0x0e30: 0xd0, # THAI CHARACTER SARA A - 0x0e31: 0xd1, # THAI CHARACTER MAI HAN-AKAT - 0x0e32: 0xd2, # THAI CHARACTER SARA AA - 0x0e33: 0xd3, # THAI CHARACTER SARA AM - 0x0e34: 0xd4, # THAI CHARACTER SARA I - 0x0e35: 0xd5, # THAI CHARACTER SARA II - 0x0e36: 0xd6, # THAI CHARACTER SARA UE - 0x0e37: 0xd7, # THAI CHARACTER SARA UEE - 0x0e38: 0xd8, # THAI CHARACTER SARA U - 0x0e39: 0xd9, # THAI CHARACTER SARA UU - 0x0e3a: 0xda, # THAI CHARACTER PHINTHU - 0x0e3f: 0xdf, # THAI CURRENCY SYMBOL BAHT - 0x0e40: 0xe0, # THAI CHARACTER SARA E - 0x0e41: 0xe1, # THAI CHARACTER SARA AE - 0x0e42: 0xe2, # THAI CHARACTER SARA O - 0x0e43: 0xe3, # THAI CHARACTER SARA AI MAIMUAN - 0x0e44: 0xe4, # THAI CHARACTER SARA AI MAIMALAI - 0x0e45: 0xe5, # THAI CHARACTER LAKKHANGYAO - 0x0e46: 0xe6, # THAI CHARACTER MAIYAMOK - 0x0e47: 0xe7, # THAI CHARACTER MAITAIKHU - 0x0e48: 0xe8, # THAI CHARACTER MAI EK - 0x0e49: 0xe9, # THAI CHARACTER MAI THO - 0x0e4a: 0xea, # THAI CHARACTER MAI TRI - 0x0e4b: 0xeb, # THAI CHARACTER MAI CHATTAWA - 0x0e4c: 0xec, # THAI CHARACTER THANTHAKHAT - 0x0e4d: 0xed, # THAI CHARACTER NIKHAHIT - 0x0e4e: 0xee, # THAI CHARACTER YAMAKKAN - 0x0e4f: 0xef, # THAI CHARACTER FONGMAN - 0x0e50: 0xf0, # THAI DIGIT ZERO - 0x0e51: 0xf1, # THAI DIGIT ONE - 0x0e52: 0xf2, # THAI DIGIT TWO - 0x0e53: 0xf3, # THAI DIGIT THREE - 0x0e54: 0xf4, # THAI DIGIT FOUR - 0x0e55: 0xf5, # THAI DIGIT FIVE - 0x0e56: 0xf6, # THAI DIGIT SIX - 0x0e57: 0xf7, # THAI DIGIT SEVEN - 0x0e58: 0xf8, # THAI DIGIT EIGHT - 0x0e59: 0xf9, # THAI DIGIT NINE - 0x0e5a: 0xfa, # THAI CHARACTER ANGKHANKHU - 0x0e5b: 0xfb, # THAI CHARACTER KHOMUT -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x0E01: 0xA1, # THAI CHARACTER KO KAI + 0x0E02: 0xA2, # THAI CHARACTER KHO KHAI + 0x0E03: 0xA3, # THAI CHARACTER KHO KHUAT + 0x0E04: 0xA4, # THAI CHARACTER KHO KHWAI + 0x0E05: 0xA5, # THAI CHARACTER KHO KHON + 0x0E06: 0xA6, # THAI CHARACTER KHO RAKHANG + 0x0E07: 0xA7, # THAI CHARACTER NGO NGU + 0x0E08: 0xA8, # THAI CHARACTER CHO CHAN + 0x0E09: 0xA9, # THAI CHARACTER CHO CHING + 0x0E0A: 0xAA, # THAI CHARACTER CHO CHANG + 0x0E0B: 0xAB, # THAI CHARACTER SO SO + 0x0E0C: 0xAC, # THAI CHARACTER CHO CHOE + 0x0E0D: 0xAD, # THAI CHARACTER YO YING + 0x0E0E: 0xAE, # THAI CHARACTER DO CHADA + 0x0E0F: 0xAF, # THAI CHARACTER TO PATAK + 0x0E10: 0xB0, # THAI CHARACTER THO THAN + 0x0E11: 0xB1, # THAI CHARACTER THO NANGMONTHO + 0x0E12: 0xB2, # THAI CHARACTER THO PHUTHAO + 0x0E13: 0xB3, # THAI CHARACTER NO NEN + 0x0E14: 0xB4, # THAI CHARACTER DO DEK + 0x0E15: 0xB5, # THAI CHARACTER TO TAO + 0x0E16: 0xB6, # THAI CHARACTER THO THUNG + 0x0E17: 0xB7, # THAI CHARACTER THO THAHAN + 0x0E18: 0xB8, # THAI CHARACTER THO THONG + 0x0E19: 0xB9, # THAI CHARACTER NO NU + 0x0E1A: 0xBA, # THAI CHARACTER BO BAIMAI + 0x0E1B: 0xBB, # THAI CHARACTER PO PLA + 0x0E1C: 0xBC, # THAI CHARACTER PHO PHUNG + 0x0E1D: 0xBD, # THAI CHARACTER FO FA + 0x0E1E: 0xBE, # THAI CHARACTER PHO PHAN + 0x0E1F: 0xBF, # THAI CHARACTER FO FAN + 0x0E20: 0xC0, # THAI CHARACTER PHO SAMPHAO + 0x0E21: 0xC1, # THAI CHARACTER MO MA + 0x0E22: 0xC2, # THAI CHARACTER YO YAK + 0x0E23: 0xC3, # THAI CHARACTER RO RUA + 0x0E24: 0xC4, # THAI CHARACTER RU + 0x0E25: 0xC5, # THAI CHARACTER LO LING + 0x0E26: 0xC6, # THAI CHARACTER LU + 0x0E27: 0xC7, # THAI CHARACTER WO WAEN + 0x0E28: 0xC8, # THAI CHARACTER SO SALA + 0x0E29: 0xC9, # THAI CHARACTER SO RUSI + 0x0E2A: 0xCA, # THAI CHARACTER SO SUA + 0x0E2B: 0xCB, # THAI CHARACTER HO HIP + 0x0E2C: 0xCC, # THAI CHARACTER LO CHULA + 0x0E2D: 0xCD, # THAI CHARACTER O ANG + 0x0E2E: 0xCE, # THAI CHARACTER HO NOKHUK + 0x0E2F: 0xCF, # THAI CHARACTER PAIYANNOI + 0x0E30: 0xD0, # THAI CHARACTER SARA A + 0x0E31: 0xD1, # THAI CHARACTER MAI HAN-AKAT + 0x0E32: 0xD2, # THAI CHARACTER SARA AA + 0x0E33: 0xD3, # THAI CHARACTER SARA AM + 0x0E34: 0xD4, # THAI CHARACTER SARA I + 0x0E35: 0xD5, # THAI CHARACTER SARA II + 0x0E36: 0xD6, # THAI CHARACTER SARA UE + 0x0E37: 0xD7, # THAI CHARACTER SARA UEE + 0x0E38: 0xD8, # THAI CHARACTER SARA U + 0x0E39: 0xD9, # THAI CHARACTER SARA UU + 0x0E3A: 0xDA, # THAI CHARACTER PHINTHU + 0x0E3F: 0xDF, # THAI CURRENCY SYMBOL BAHT + 0x0E40: 0xE0, # THAI CHARACTER SARA E + 0x0E41: 0xE1, # THAI CHARACTER SARA AE + 0x0E42: 0xE2, # THAI CHARACTER SARA O + 0x0E43: 0xE3, # THAI CHARACTER SARA AI MAIMUAN + 0x0E44: 0xE4, # THAI CHARACTER SARA AI MAIMALAI + 0x0E45: 0xE5, # THAI CHARACTER LAKKHANGYAO + 0x0E46: 0xE6, # THAI CHARACTER MAIYAMOK + 0x0E47: 0xE7, # THAI CHARACTER MAITAIKHU + 0x0E48: 0xE8, # THAI CHARACTER MAI EK + 0x0E49: 0xE9, # THAI CHARACTER MAI THO + 0x0E4A: 0xEA, # THAI CHARACTER MAI TRI + 0x0E4B: 0xEB, # THAI CHARACTER MAI CHATTAWA + 0x0E4C: 0xEC, # THAI CHARACTER THANTHAKHAT + 0x0E4D: 0xED, # THAI CHARACTER NIKHAHIT + 0x0E4E: 0xEE, # THAI CHARACTER YAMAKKAN + 0x0E4F: 0xEF, # THAI CHARACTER FONGMAN + 0x0E50: 0xF0, # THAI DIGIT ZERO + 0x0E51: 0xF1, # THAI DIGIT ONE + 0x0E52: 0xF2, # THAI DIGIT TWO + 0x0E53: 0xF3, # THAI DIGIT THREE + 0x0E54: 0xF4, # THAI DIGIT FOUR + 0x0E55: 0xF5, # THAI DIGIT FIVE + 0x0E56: 0xF6, # THAI DIGIT SIX + 0x0E57: 0xF7, # THAI DIGIT SEVEN + 0x0E58: 0xF8, # THAI DIGIT EIGHT + 0x0E59: 0xF9, # THAI DIGIT NINE + 0x0E5A: 0xFA, # THAI CHARACTER ANGKHANKHU + 0x0E5B: 0xFB, # THAI CHARACTER KHOMUT +} + Index: iso8859_13.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_13.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- iso8859_13.py 24 Oct 2005 12:07:48 -0000 1.6 +++ iso8859_13.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,108 +186,108 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u201d' # 0xa1 -> RIGHT DOUBLE QUOTATION MARK - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa4' # 0xa4 -> CURRENCY SIGN - u'\u201e' # 0xa5 -> DOUBLE LOW-9 QUOTATION MARK - u'\xa6' # 0xa6 -> BROKEN BAR - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xd8' # 0xa8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\u0156' # 0xaa -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xac -> NOT SIGN - u'\xad' # 0xad -> SOFT HYPHEN - u'\xae' # 0xae -> REGISTERED SIGN - u'\xc6' # 0xaf -> LATIN CAPITAL LETTER AE - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\xb2' # 0xb2 -> SUPERSCRIPT TWO - u'\xb3' # 0xb3 -> SUPERSCRIPT THREE - u'\u201c' # 0xb4 -> LEFT DOUBLE QUOTATION MARK - u'\xb5' # 0xb5 -> MICRO SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\xf8' # 0xb8 -> LATIN SMALL LETTER O WITH STROKE - u'\xb9' # 0xb9 -> SUPERSCRIPT ONE - u'\u0157' # 0xba -> LATIN SMALL LETTER R WITH CEDILLA - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xbc -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xbe -> VULGAR FRACTION THREE QUARTERS - u'\xe6' # 0xbf -> LATIN SMALL LETTER AE - u'\u0104' # 0xc0 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u012e' # 0xc1 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u0100' # 0xc2 -> LATIN CAPITAL LETTER A WITH MACRON - u'\u0106' # 0xc3 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc4' # 0xc4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xc5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\u0118' # 0xc6 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u0112' # 0xc7 -> LATIN CAPITAL LETTER E WITH MACRON - u'\u010c' # 0xc8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xc9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0179' # 0xca -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\u0116' # 0xcb -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\u0122' # 0xcc -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u0136' # 0xcd -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\u012a' # 0xce -> LATIN CAPITAL LETTER I WITH MACRON - u'\u013b' # 0xcf -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u0160' # 0xd0 -> LATIN CAPITAL LETTER S WITH CARON - u'\u0143' # 0xd1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0145' # 0xd2 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\xd3' # 0xd3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\u014c' # 0xd4 -> LATIN CAPITAL LETTER O WITH MACRON - u'\xd5' # 0xd5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xd6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xd7 -> MULTIPLICATION SIGN - u'\u0172' # 0xd8 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\u0141' # 0xd9 -> LATIN CAPITAL LETTER L WITH STROKE - u'\u015a' # 0xda -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u016a' # 0xdb -> LATIN CAPITAL LETTER U WITH MACRON - u'\xdc' # 0xdc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u017b' # 0xdd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u017d' # 0xde -> LATIN CAPITAL LETTER Z WITH CARON - u'\xdf' # 0xdf -> LATIN SMALL LETTER SHARP S (German) - u'\u0105' # 0xe0 -> LATIN SMALL LETTER A WITH OGONEK - u'\u012f' # 0xe1 -> LATIN SMALL LETTER I WITH OGONEK - u'\u0101' # 0xe2 -> LATIN SMALL LETTER A WITH MACRON - u'\u0107' # 0xe3 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe4' # 0xe4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xe5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\u0119' # 0xe6 -> LATIN SMALL LETTER E WITH OGONEK - u'\u0113' # 0xe7 -> LATIN SMALL LETTER E WITH MACRON - u'\u010d' # 0xe8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u017a' # 0xea -> LATIN SMALL LETTER Z WITH ACUTE - u'\u0117' # 0xeb -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\u0123' # 0xec -> LATIN SMALL LETTER G WITH CEDILLA - u'\u0137' # 0xed -> LATIN SMALL LETTER K WITH CEDILLA - u'\u012b' # 0xee -> LATIN SMALL LETTER I WITH MACRON - u'\u013c' # 0xef -> LATIN SMALL LETTER L WITH CEDILLA - u'\u0161' # 0xf0 -> LATIN SMALL LETTER S WITH CARON - u'\u0144' # 0xf1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0146' # 0xf2 -> LATIN SMALL LETTER N WITH CEDILLA - u'\xf3' # 0xf3 -> LATIN SMALL LETTER O WITH ACUTE - u'\u014d' # 0xf4 -> LATIN SMALL LETTER O WITH MACRON - u'\xf5' # 0xf5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xf6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xf7 -> DIVISION SIGN - u'\u0173' # 0xf8 -> LATIN SMALL LETTER U WITH OGONEK - u'\u0142' # 0xf9 -> LATIN SMALL LETTER L WITH STROKE - u'\u015b' # 0xfa -> LATIN SMALL LETTER S WITH ACUTE - u'\u016b' # 0xfb -> LATIN SMALL LETTER U WITH MACRON - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u017c' # 0xfd -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u017e' # 0xfe -> LATIN SMALL LETTER Z WITH CARON - u'\u2019' # 0xff -> RIGHT SINGLE QUOTATION MARK + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u201d' # 0xA1 -> RIGHT DOUBLE QUOTATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\u201c' # 0xB4 -> LEFT DOUBLE QUOTATION MARK + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xe6' # 0xBF -> LATIN SMALL LETTER AE + u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON + u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE + u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) + u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK + u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK + u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON + u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK + u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE + u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA + u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA + u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON + u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON + u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK + u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE + u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE + u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON + u'\u2019' # 0xFF -> RIGHT SINGLE QUOTATION MARK ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,106 +447,107 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00a6: 0xa6, # BROKEN BAR - 0x00a7: 0xa7, # SECTION SIGN - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xac, # NOT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00ae: 0xae, # REGISTERED SIGN - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b2: 0xb2, # SUPERSCRIPT TWO - 0x00b3: 0xb3, # SUPERSCRIPT THREE - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00b9: 0xb9, # SUPERSCRIPT ONE - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xbc, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xbd, # VULGAR FRACTION ONE HALF - 0x00be: 0xbe, # VULGAR FRACTION THREE QUARTERS - 0x00c4: 0xc4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0xc5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0xaf, # LATIN CAPITAL LETTER AE - 0x00c9: 0xc9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d3: 0xd3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d5: 0xd5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0xd6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xd7, # MULTIPLICATION SIGN - 0x00d8: 0xa8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00dc: 0xdc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0xdf, # LATIN SMALL LETTER SHARP S (German) - 0x00e4: 0xe4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0xe5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0xbf, # LATIN SMALL LETTER AE - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE - 0x00f5: 0xf5, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xf7, # DIVISION SIGN - 0x00f8: 0xb8, # LATIN SMALL LETTER O WITH STROKE - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0100: 0xc2, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0xe2, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0xc0, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xe0, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0xc3, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xe3, # LATIN SMALL LETTER C WITH ACUTE - 0x010c: 0xc8, # LATIN CAPITAL LETTER C WITH CARON - 0x010d: 0xe8, # LATIN SMALL LETTER C WITH CARON - 0x0112: 0xc7, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0xe7, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0xcb, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0xeb, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0xc6, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xe6, # LATIN SMALL LETTER E WITH OGONEK - 0x0122: 0xcc, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0xec, # LATIN SMALL LETTER G WITH CEDILLA - 0x012a: 0xce, # LATIN CAPITAL LETTER I WITH MACRON - 0x012b: 0xee, # LATIN SMALL LETTER I WITH MACRON - 0x012e: 0xc1, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012f: 0xe1, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0xcd, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0xed, # LATIN SMALL LETTER K WITH CEDILLA - 0x013b: 0xcf, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013c: 0xef, # LATIN SMALL LETTER L WITH CEDILLA - 0x0141: 0xd9, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xf9, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xd1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xf1, # LATIN SMALL LETTER N WITH ACUTE - 0x0145: 0xd2, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0xf2, # LATIN SMALL LETTER N WITH CEDILLA - 0x014c: 0xd4, # LATIN CAPITAL LETTER O WITH MACRON - 0x014d: 0xf4, # LATIN SMALL LETTER O WITH MACRON - 0x0156: 0xaa, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x0157: 0xba, # LATIN SMALL LETTER R WITH CEDILLA - 0x015a: 0xda, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015b: 0xfa, # LATIN SMALL LETTER S WITH ACUTE - 0x0160: 0xd0, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xf0, # LATIN SMALL LETTER S WITH CARON - 0x016a: 0xdb, # LATIN CAPITAL LETTER U WITH MACRON - 0x016b: 0xfb, # LATIN SMALL LETTER U WITH MACRON - 0x0172: 0xd8, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0xf8, # LATIN SMALL LETTER U WITH OGONEK - 0x0179: 0xca, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017a: 0xea, # LATIN SMALL LETTER Z WITH ACUTE - 0x017b: 0xdd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017c: 0xfd, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017d: 0xde, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0xfe, # LATIN SMALL LETTER Z WITH CARON - 0x2019: 0xff, # RIGHT SINGLE QUOTATION MARK - 0x201c: 0xb4, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0xa1, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0xa5, # DOUBLE LOW-9 QUOTATION MARK -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xAF, # LATIN CAPITAL LETTER AE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xA8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S (German) + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xBF, # LATIN SMALL LETTER AE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xB8, # LATIN SMALL LETTER O WITH STROKE + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0xC2, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0xE2, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0xC0, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0xE0, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0xC3, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0xE3, # LATIN SMALL LETTER C WITH ACUTE + 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON + 0x0112: 0xC7, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0xE7, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0xCB, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0xEB, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0xC6, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xE6, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0xCC, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0xEC, # LATIN SMALL LETTER G WITH CEDILLA + 0x012A: 0xCE, # LATIN CAPITAL LETTER I WITH MACRON + 0x012B: 0xEE, # LATIN SMALL LETTER I WITH MACRON + 0x012E: 0xC1, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012F: 0xE1, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0xCD, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0xED, # LATIN SMALL LETTER K WITH CEDILLA + 0x013B: 0xCF, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013C: 0xEF, # LATIN SMALL LETTER L WITH CEDILLA + 0x0141: 0xD9, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0xF9, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE + 0x0145: 0xD2, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0xF2, # LATIN SMALL LETTER N WITH CEDILLA + 0x014C: 0xD4, # LATIN CAPITAL LETTER O WITH MACRON + 0x014D: 0xF4, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0xAA, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0xBA, # LATIN SMALL LETTER R WITH CEDILLA + 0x015A: 0xDA, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015B: 0xFA, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0xD0, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xF0, # LATIN SMALL LETTER S WITH CARON + 0x016A: 0xDB, # LATIN CAPITAL LETTER U WITH MACRON + 0x016B: 0xFB, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0xD8, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0xF8, # LATIN SMALL LETTER U WITH OGONEK + 0x0179: 0xCA, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017A: 0xEA, # LATIN SMALL LETTER Z WITH ACUTE + 0x017B: 0xDD, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017C: 0xFD, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017D: 0xDE, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xFE, # LATIN SMALL LETTER Z WITH CARON + 0x2019: 0xFF, # RIGHT SINGLE QUOTATION MARK + 0x201C: 0xB4, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xA1, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xA5, # DOUBLE LOW-9 QUOTATION MARK +} + Index: iso8859_14.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_14.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- iso8859_14.py 24 Oct 2005 12:07:48 -0000 1.6 +++ iso8859_14.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,108 +186,108 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u1e02' # 0xa1 -> LATIN CAPITAL LETTER B WITH DOT ABOVE - u'\u1e03' # 0xa2 -> LATIN SMALL LETTER B WITH DOT ABOVE - u'\xa3' # 0xa3 -> POUND SIGN - u'\u010a' # 0xa4 -> LATIN CAPITAL LETTER C WITH DOT ABOVE - u'\u010b' # 0xa5 -> LATIN SMALL LETTER C WITH DOT ABOVE - u'\u1e0a' # 0xa6 -> LATIN CAPITAL LETTER D WITH DOT ABOVE - u'\xa7' # 0xa7 -> SECTION SIGN - u'\u1e80' # 0xa8 -> LATIN CAPITAL LETTER W WITH GRAVE - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\u1e82' # 0xaa -> LATIN CAPITAL LETTER W WITH ACUTE - u'\u1e0b' # 0xab -> LATIN SMALL LETTER D WITH DOT ABOVE - u'\u1ef2' # 0xac -> LATIN CAPITAL LETTER Y WITH GRAVE - u'\xad' # 0xad -> SOFT HYPHEN - u'\xae' # 0xae -> REGISTERED SIGN - u'\u0178' # 0xaf -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u1e1e' # 0xb0 -> LATIN CAPITAL LETTER F WITH DOT ABOVE - u'\u1e1f' # 0xb1 -> LATIN SMALL LETTER F WITH DOT ABOVE - u'\u0120' # 0xb2 -> LATIN CAPITAL LETTER G WITH DOT ABOVE - u'\u0121' # 0xb3 -> LATIN SMALL LETTER G WITH DOT ABOVE - u'\u1e40' # 0xb4 -> LATIN CAPITAL LETTER M WITH DOT ABOVE - u'\u1e41' # 0xb5 -> LATIN SMALL LETTER M WITH DOT ABOVE - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\u1e56' # 0xb7 -> LATIN CAPITAL LETTER P WITH DOT ABOVE - u'\u1e81' # 0xb8 -> LATIN SMALL LETTER W WITH GRAVE - u'\u1e57' # 0xb9 -> LATIN SMALL LETTER P WITH DOT ABOVE - u'\u1e83' # 0xba -> LATIN SMALL LETTER W WITH ACUTE - u'\u1e60' # 0xbb -> LATIN CAPITAL LETTER S WITH DOT ABOVE - u'\u1ef3' # 0xbc -> LATIN SMALL LETTER Y WITH GRAVE - u'\u1e84' # 0xbd -> LATIN CAPITAL LETTER W WITH DIAERESIS - u'\u1e85' # 0xbe -> LATIN SMALL LETTER W WITH DIAERESIS - u'\u1e61' # 0xbf -> LATIN SMALL LETTER S WITH DOT ABOVE - u'\xc0' # 0xc0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xc1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xc2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xc3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xc4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xc5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xc6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xc7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xc8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xc9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xcb -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xcc -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xcd -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xcf -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u0174' # 0xd0 -> LATIN CAPITAL LETTER W WITH CIRCUMFLEX - u'\xd1' # 0xd1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xd2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xd3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xd4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xd5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xd6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u1e6a' # 0xd7 -> LATIN CAPITAL LETTER T WITH DOT ABOVE - u'\xd8' # 0xd8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xd9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xda -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xdb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xdc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xdd -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\u0176' # 0xde -> LATIN CAPITAL LETTER Y WITH CIRCUMFLEX - u'\xdf' # 0xdf -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xe0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xe1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xe2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xe3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xe4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xe5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xe6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xe7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xe8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xea -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xeb -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xec -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xed -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xee -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xef -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u0175' # 0xf0 -> LATIN SMALL LETTER W WITH CIRCUMFLEX - u'\xf1' # 0xf1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xf2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xf3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xf4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xf5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xf6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u1e6b' # 0xf7 -> LATIN SMALL LETTER T WITH DOT ABOVE - u'\xf8' # 0xf8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xf9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xfa -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xfb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xfd -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0177' # 0xfe -> LATIN SMALL LETTER Y WITH CIRCUMFLEX - u'\xff' # 0xff -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u1e02' # 0xA1 -> LATIN CAPITAL LETTER B WITH DOT ABOVE + u'\u1e03' # 0xA2 -> LATIN SMALL LETTER B WITH DOT ABOVE + u'\xa3' # 0xA3 -> POUND SIGN + u'\u010a' # 0xA4 -> LATIN CAPITAL LETTER C WITH DOT ABOVE + u'\u010b' # 0xA5 -> LATIN SMALL LETTER C WITH DOT ABOVE + u'\u1e0a' # 0xA6 -> LATIN CAPITAL LETTER D WITH DOT ABOVE + u'\xa7' # 0xA7 -> SECTION SIGN + u'\u1e80' # 0xA8 -> LATIN CAPITAL LETTER W WITH GRAVE + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u1e82' # 0xAA -> LATIN CAPITAL LETTER W WITH ACUTE + u'\u1e0b' # 0xAB -> LATIN SMALL LETTER D WITH DOT ABOVE + u'\u1ef2' # 0xAC -> LATIN CAPITAL LETTER Y WITH GRAVE + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\u0178' # 0xAF -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u1e1e' # 0xB0 -> LATIN CAPITAL LETTER F WITH DOT ABOVE + u'\u1e1f' # 0xB1 -> LATIN SMALL LETTER F WITH DOT ABOVE + u'\u0120' # 0xB2 -> LATIN CAPITAL LETTER G WITH DOT ABOVE + u'\u0121' # 0xB3 -> LATIN SMALL LETTER G WITH DOT ABOVE + u'\u1e40' # 0xB4 -> LATIN CAPITAL LETTER M WITH DOT ABOVE + u'\u1e41' # 0xB5 -> LATIN SMALL LETTER M WITH DOT ABOVE + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\u1e56' # 0xB7 -> LATIN CAPITAL LETTER P WITH DOT ABOVE + u'\u1e81' # 0xB8 -> LATIN SMALL LETTER W WITH GRAVE + u'\u1e57' # 0xB9 -> LATIN SMALL LETTER P WITH DOT ABOVE + u'\u1e83' # 0xBA -> LATIN SMALL LETTER W WITH ACUTE + u'\u1e60' # 0xBB -> LATIN CAPITAL LETTER S WITH DOT ABOVE + u'\u1ef3' # 0xBC -> LATIN SMALL LETTER Y WITH GRAVE + u'\u1e84' # 0xBD -> LATIN CAPITAL LETTER W WITH DIAERESIS + u'\u1e85' # 0xBE -> LATIN SMALL LETTER W WITH DIAERESIS + u'\u1e61' # 0xBF -> LATIN SMALL LETTER S WITH DOT ABOVE + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u0174' # 0xD0 -> LATIN CAPITAL LETTER W WITH CIRCUMFLEX + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u1e6a' # 0xD7 -> LATIN CAPITAL LETTER T WITH DOT ABOVE + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0176' # 0xDE -> LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u0175' # 0xF0 -> LATIN SMALL LETTER W WITH CIRCUMFLEX + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u1e6b' # 0xF7 -> LATIN SMALL LETTER T WITH DOT ABOVE + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0177' # 0xFE -> LATIN SMALL LETTER Y WITH CIRCUMFLEX + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,106 +447,107 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a3: 0xa3, # POUND SIGN - 0x00a7: 0xa7, # SECTION SIGN - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00ae: 0xae, # REGISTERED SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00c0: 0xc0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0xc1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xc2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0xc3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0xc4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0xc5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0xc6, # LATIN CAPITAL LETTER AE - 0x00c7: 0xc7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0xc8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0xc9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0xca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0xcb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0xcc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0xcd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0xcf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d1: 0xd1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xd2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xd3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xd4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xd5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0xd6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d8: 0xd8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0xd9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xda, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xdb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0xdc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0xdd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00df: 0xdf, # LATIN SMALL LETTER SHARP S - 0x00e0: 0xe0, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0xe1, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0xe2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0xe3, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0xe4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0xe5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0xe6, # LATIN SMALL LETTER AE - 0x00e7: 0xe7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0xe8, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0xea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0xeb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0xec, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0xed, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0xee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0xef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0xf1, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0xf2, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0xf5, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f8: 0xf8, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0xf9, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0xfb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0xfd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00ff: 0xff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x010a: 0xa4, # LATIN CAPITAL LETTER C WITH DOT ABOVE - 0x010b: 0xa5, # LATIN SMALL LETTER C WITH DOT ABOVE - 0x0120: 0xb2, # LATIN CAPITAL LETTER G WITH DOT ABOVE - 0x0121: 0xb3, # LATIN SMALL LETTER G WITH DOT ABOVE - 0x0174: 0xd0, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX - 0x0175: 0xf0, # LATIN SMALL LETTER W WITH CIRCUMFLEX - 0x0176: 0xde, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX - 0x0177: 0xfe, # LATIN SMALL LETTER Y WITH CIRCUMFLEX - 0x0178: 0xaf, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x1e02: 0xa1, # LATIN CAPITAL LETTER B WITH DOT ABOVE - 0x1e03: 0xa2, # LATIN SMALL LETTER B WITH DOT ABOVE - 0x1e0a: 0xa6, # LATIN CAPITAL LETTER D WITH DOT ABOVE - 0x1e0b: 0xab, # LATIN SMALL LETTER D WITH DOT ABOVE - 0x1e1e: 0xb0, # LATIN CAPITAL LETTER F WITH DOT ABOVE - 0x1e1f: 0xb1, # LATIN SMALL LETTER F WITH DOT ABOVE - 0x1e40: 0xb4, # LATIN CAPITAL LETTER M WITH DOT ABOVE - 0x1e41: 0xb5, # LATIN SMALL LETTER M WITH DOT ABOVE - 0x1e56: 0xb7, # LATIN CAPITAL LETTER P WITH DOT ABOVE - 0x1e57: 0xb9, # LATIN SMALL LETTER P WITH DOT ABOVE - 0x1e60: 0xbb, # LATIN CAPITAL LETTER S WITH DOT ABOVE - 0x1e61: 0xbf, # LATIN SMALL LETTER S WITH DOT ABOVE - 0x1e6a: 0xd7, # LATIN CAPITAL LETTER T WITH DOT ABOVE - 0x1e6b: 0xf7, # LATIN SMALL LETTER T WITH DOT ABOVE - 0x1e80: 0xa8, # LATIN CAPITAL LETTER W WITH GRAVE - 0x1e81: 0xb8, # LATIN SMALL LETTER W WITH GRAVE - 0x1e82: 0xaa, # LATIN CAPITAL LETTER W WITH ACUTE - 0x1e83: 0xba, # LATIN SMALL LETTER W WITH ACUTE - 0x1e84: 0xbd, # LATIN CAPITAL LETTER W WITH DIAERESIS - 0x1e85: 0xbe, # LATIN SMALL LETTER W WITH DIAERESIS - 0x1ef2: 0xac, # LATIN CAPITAL LETTER Y WITH GRAVE - 0x1ef3: 0xbc, # LATIN SMALL LETTER Y WITH GRAVE -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A3: 0xA3, # POUND SIGN + 0x00A7: 0xA7, # SECTION SIGN + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x010A: 0xA4, # LATIN CAPITAL LETTER C WITH DOT ABOVE + 0x010B: 0xA5, # LATIN SMALL LETTER C WITH DOT ABOVE + 0x0120: 0xB2, # LATIN CAPITAL LETTER G WITH DOT ABOVE + 0x0121: 0xB3, # LATIN SMALL LETTER G WITH DOT ABOVE + 0x0174: 0xD0, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX + 0x0175: 0xF0, # LATIN SMALL LETTER W WITH CIRCUMFLEX + 0x0176: 0xDE, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + 0x0177: 0xFE, # LATIN SMALL LETTER Y WITH CIRCUMFLEX + 0x0178: 0xAF, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x1E02: 0xA1, # LATIN CAPITAL LETTER B WITH DOT ABOVE + 0x1E03: 0xA2, # LATIN SMALL LETTER B WITH DOT ABOVE + 0x1E0A: 0xA6, # LATIN CAPITAL LETTER D WITH DOT ABOVE + 0x1E0B: 0xAB, # LATIN SMALL LETTER D WITH DOT ABOVE + 0x1E1E: 0xB0, # LATIN CAPITAL LETTER F WITH DOT ABOVE + 0x1E1F: 0xB1, # LATIN SMALL LETTER F WITH DOT ABOVE + 0x1E40: 0xB4, # LATIN CAPITAL LETTER M WITH DOT ABOVE + 0x1E41: 0xB5, # LATIN SMALL LETTER M WITH DOT ABOVE + 0x1E56: 0xB7, # LATIN CAPITAL LETTER P WITH DOT ABOVE + 0x1E57: 0xB9, # LATIN SMALL LETTER P WITH DOT ABOVE + 0x1E60: 0xBB, # LATIN CAPITAL LETTER S WITH DOT ABOVE + 0x1E61: 0xBF, # LATIN SMALL LETTER S WITH DOT ABOVE + 0x1E6A: 0xD7, # LATIN CAPITAL LETTER T WITH DOT ABOVE + 0x1E6B: 0xF7, # LATIN SMALL LETTER T WITH DOT ABOVE + 0x1E80: 0xA8, # LATIN CAPITAL LETTER W WITH GRAVE + 0x1E81: 0xB8, # LATIN SMALL LETTER W WITH GRAVE + 0x1E82: 0xAA, # LATIN CAPITAL LETTER W WITH ACUTE + 0x1E83: 0xBA, # LATIN SMALL LETTER W WITH ACUTE + 0x1E84: 0xBD, # LATIN CAPITAL LETTER W WITH DIAERESIS + 0x1E85: 0xBE, # LATIN SMALL LETTER W WITH DIAERESIS + 0x1EF2: 0xAC, # LATIN CAPITAL LETTER Y WITH GRAVE + 0x1EF3: 0xBC, # LATIN SMALL LETTER Y WITH GRAVE +} + Index: iso8859_15.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_15.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- iso8859_15.py 24 Oct 2005 12:07:48 -0000 1.6 +++ iso8859_15.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,108 +186,108 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\xa1' # 0xa1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\u20ac' # 0xa4 -> EURO SIGN - u'\xa5' # 0xa5 -> YEN SIGN - u'\u0160' # 0xa6 -> LATIN CAPITAL LETTER S WITH CARON - u'\xa7' # 0xa7 -> SECTION SIGN - u'\u0161' # 0xa8 -> LATIN SMALL LETTER S WITH CARON - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\xaa' # 0xaa -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xac -> NOT SIGN - u'\xad' # 0xad -> SOFT HYPHEN - u'\xae' # 0xae -> REGISTERED SIGN - u'\xaf' # 0xaf -> MACRON - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\xb2' # 0xb2 -> SUPERSCRIPT TWO - u'\xb3' # 0xb3 -> SUPERSCRIPT THREE - u'\u017d' # 0xb4 -> LATIN CAPITAL LETTER Z WITH CARON - u'\xb5' # 0xb5 -> MICRO SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\u017e' # 0xb8 -> LATIN SMALL LETTER Z WITH CARON - u'\xb9' # 0xb9 -> SUPERSCRIPT ONE - u'\xba' # 0xba -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0152' # 0xbc -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xbd -> LATIN SMALL LIGATURE OE - u'\u0178' # 0xbe -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xbf' # 0xbf -> INVERTED QUESTION MARK - u'\xc0' # 0xc0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xc1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xc2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xc3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xc4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xc5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xc6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xc7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xc8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xc9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xcb -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xcc -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xcd -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xcf -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xd0 -> LATIN CAPITAL LETTER ETH - u'\xd1' # 0xd1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xd2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xd3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xd4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xd5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xd6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xd7 -> MULTIPLICATION SIGN - u'\xd8' # 0xd8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xd9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xda -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xdb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xdc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xdd -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xde -> LATIN CAPITAL LETTER THORN - u'\xdf' # 0xdf -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xe0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xe1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xe2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xe3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xe4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xe5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xe6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xe7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xe8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xea -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xeb -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xec -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xed -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xee -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xef -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xf0 -> LATIN SMALL LETTER ETH - u'\xf1' # 0xf1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xf2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xf3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xf4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xf5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xf6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xf7 -> DIVISION SIGN - u'\xf8' # 0xf8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xf9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xfa -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xfb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xfd -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xfe -> LATIN SMALL LETTER THORN - u'\xff' # 0xff -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\u20ac' # 0xA4 -> EURO SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\u0160' # 0xA6 -> LATIN CAPITAL LETTER S WITH CARON + u'\xa7' # 0xA7 -> SECTION SIGN + u'\u0161' # 0xA8 -> LATIN SMALL LETTER S WITH CARON + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\u017d' # 0xB4 -> LATIN CAPITAL LETTER Z WITH CARON + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u017e' # 0xB8 -> LATIN SMALL LETTER Z WITH CARON + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0152' # 0xBC -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xBD -> LATIN SMALL LIGATURE OE + u'\u0178' # 0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,106 +447,107 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a1: 0xa1, # INVERTED EXCLAMATION MARK - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a5: 0xa5, # YEN SIGN - 0x00a7: 0xa7, # SECTION SIGN - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00aa: 0xaa, # FEMININE ORDINAL INDICATOR - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xac, # NOT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00ae: 0xae, # REGISTERED SIGN - 0x00af: 0xaf, # MACRON - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b2: 0xb2, # SUPERSCRIPT TWO - 0x00b3: 0xb3, # SUPERSCRIPT THREE - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00b9: 0xb9, # SUPERSCRIPT ONE - 0x00ba: 0xba, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bf: 0xbf, # INVERTED QUESTION MARK - 0x00c0: 0xc0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0xc1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xc2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0xc3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0xc4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0xc5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0xc6, # LATIN CAPITAL LETTER AE - 0x00c7: 0xc7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0xc8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0xc9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0xca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0xcb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0xcc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0xcd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0xcf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d0: 0xd0, # LATIN CAPITAL LETTER ETH - 0x00d1: 0xd1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xd2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xd3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xd4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xd5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0xd6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xd7, # MULTIPLICATION SIGN - 0x00d8: 0xd8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0xd9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xda, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xdb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0xdc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0xdd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0xde, # LATIN CAPITAL LETTER THORN - 0x00df: 0xdf, # LATIN SMALL LETTER SHARP S - 0x00e0: 0xe0, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0xe1, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0xe2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0xe3, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0xe4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0xe5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0xe6, # LATIN SMALL LETTER AE - 0x00e7: 0xe7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0xe8, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0xea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0xeb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0xec, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0xed, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0xee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0xef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f0: 0xf0, # LATIN SMALL LETTER ETH - 0x00f1: 0xf1, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0xf2, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0xf5, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xf7, # DIVISION SIGN - 0x00f8: 0xf8, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0xf9, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0xfb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0xfd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0xfe, # LATIN SMALL LETTER THORN - 0x00ff: 0xff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0152: 0xbc, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xbd, # LATIN SMALL LIGATURE OE - 0x0160: 0xa6, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xa8, # LATIN SMALL LETTER S WITH CARON - 0x0178: 0xbe, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x017d: 0xb4, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0xb8, # LATIN SMALL LETTER Z WITH CARON - 0x20ac: 0xa4, # EURO SIGN -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A7: 0xA7, # SECTION SIGN + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BF: 0xBF, # INVERTED QUESTION MARK + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0xF0, # LATIN SMALL LETTER ETH + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0xFE, # LATIN SMALL LETTER THORN + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0152: 0xBC, # LATIN CAPITAL LIGATURE OE + 0x0153: 0xBD, # LATIN SMALL LIGATURE OE + 0x0160: 0xA6, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xA8, # LATIN SMALL LETTER S WITH CARON + 0x0178: 0xBE, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x017D: 0xB4, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xB8, # LATIN SMALL LETTER Z WITH CARON + 0x20AC: 0xA4, # EURO SIGN +} + Index: iso8859_16.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_16.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- iso8859_16.py 24 Oct 2005 12:07:48 -0000 1.4 +++ iso8859_16.py 24 Oct 2005 12:14:59 -0000 1.5 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,108 +186,108 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u0104' # 0xa1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u0105' # 0xa2 -> LATIN SMALL LETTER A WITH OGONEK - u'\u0141' # 0xa3 -> LATIN CAPITAL LETTER L WITH STROKE - u'\u20ac' # 0xa4 -> EURO SIGN - u'\u201e' # 0xa5 -> DOUBLE LOW-9 QUOTATION MARK - u'\u0160' # 0xa6 -> LATIN CAPITAL LETTER S WITH CARON - u'\xa7' # 0xa7 -> SECTION SIGN - u'\u0161' # 0xa8 -> LATIN SMALL LETTER S WITH CARON - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\u0218' # 0xaa -> LATIN CAPITAL LETTER S WITH COMMA BELOW - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0179' # 0xac -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\xad' # 0xad -> SOFT HYPHEN - u'\u017a' # 0xae -> LATIN SMALL LETTER Z WITH ACUTE - u'\u017b' # 0xaf -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\u010c' # 0xb2 -> LATIN CAPITAL LETTER C WITH CARON - u'\u0142' # 0xb3 -> LATIN SMALL LETTER L WITH STROKE - u'\u017d' # 0xb4 -> LATIN CAPITAL LETTER Z WITH CARON - u'\u201d' # 0xb5 -> RIGHT DOUBLE QUOTATION MARK - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\u017e' # 0xb8 -> LATIN SMALL LETTER Z WITH CARON - u'\u010d' # 0xb9 -> LATIN SMALL LETTER C WITH CARON - u'\u0219' # 0xba -> LATIN SMALL LETTER S WITH COMMA BELOW - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0152' # 0xbc -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xbd -> LATIN SMALL LIGATURE OE - u'\u0178' # 0xbe -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u017c' # 0xbf -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\xc0' # 0xc0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xc1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xc2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xc3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xc4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0106' # 0xc5 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc6' # 0xc6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xc7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xc8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xc9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xcb -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xcc -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xcd -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xcf -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u0110' # 0xd0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0143' # 0xd1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\xd2' # 0xd2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xd3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xd4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0150' # 0xd5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd6' # 0xd6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u015a' # 0xd7 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u0170' # 0xd8 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\xd9' # 0xd9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xda -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xdb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xdc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0118' # 0xdd -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u021a' # 0xde -> LATIN CAPITAL LETTER T WITH COMMA BELOW - u'\xdf' # 0xdf -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xe0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xe1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xe2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xe3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xe4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u0107' # 0xe5 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe6' # 0xe6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xe7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xe8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xea -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xeb -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xec -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xed -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xee -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xef -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u0111' # 0xf0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0144' # 0xf1 -> LATIN SMALL LETTER N WITH ACUTE - u'\xf2' # 0xf2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xf3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xf4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0151' # 0xf5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\xf6' # 0xf6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u015b' # 0xf7 -> LATIN SMALL LETTER S WITH ACUTE - u'\u0171' # 0xf8 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\xf9' # 0xf9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xfa -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xfb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0119' # 0xfd -> LATIN SMALL LETTER E WITH OGONEK - u'\u021b' # 0xfe -> LATIN SMALL LETTER T WITH COMMA BELOW - u'\xff' # 0xff -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0105' # 0xA2 -> LATIN SMALL LETTER A WITH OGONEK + u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE + u'\u20ac' # 0xA4 -> EURO SIGN + u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK + u'\u0160' # 0xA6 -> LATIN CAPITAL LETTER S WITH CARON + u'\xa7' # 0xA7 -> SECTION SIGN + u'\u0161' # 0xA8 -> LATIN SMALL LETTER S WITH CARON + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u0218' # 0xAA -> LATIN CAPITAL LETTER S WITH COMMA BELOW + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0179' # 0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u017a' # 0xAE -> LATIN SMALL LETTER Z WITH ACUTE + u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u010c' # 0xB2 -> LATIN CAPITAL LETTER C WITH CARON + u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE + u'\u017d' # 0xB4 -> LATIN CAPITAL LETTER Z WITH CARON + u'\u201d' # 0xB5 -> RIGHT DOUBLE QUOTATION MARK + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u017e' # 0xB8 -> LATIN SMALL LETTER Z WITH CARON + u'\u010d' # 0xB9 -> LATIN SMALL LETTER C WITH CARON + u'\u0219' # 0xBA -> LATIN SMALL LETTER S WITH COMMA BELOW + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0152' # 0xBC -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xBD -> LATIN SMALL LIGATURE OE + u'\u0178' # 0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0106' # 0xC5 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u015a' # 0xD7 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u0170' # 0xD8 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0118' # 0xDD -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u021a' # 0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u0107' # 0xE5 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u015b' # 0xF7 -> LATIN SMALL LETTER S WITH ACUTE + u'\u0171' # 0xF8 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0119' # 0xFD -> LATIN SMALL LETTER E WITH OGONEK + u'\u021b' # 0xFE -> LATIN SMALL LETTER T WITH COMMA BELOW + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,106 +447,107 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a7: 0xa7, # SECTION SIGN - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ad: 0xad, # SOFT HYPHEN - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c0: 0xc0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0xc1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xc2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c4: 0xc4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c6: 0xc6, # LATIN CAPITAL LETTER AE - 0x00c7: 0xc7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0xc8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0xc9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0xca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0xcb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0xcc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0xcd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0xcf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d2: 0xd2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xd3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xd4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d6: 0xd6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d9: 0xd9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xda, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xdb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0xdc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0xdf, # LATIN SMALL LETTER SHARP S - 0x00e0: 0xe0, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0xe1, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0xe2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0xe4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e6: 0xe6, # LATIN SMALL LETTER AE - 0x00e7: 0xe7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0xe8, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0xea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0xeb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0xec, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0xed, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0xee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0xef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f2: 0xf2, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f9: 0xf9, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0xfb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0xff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0102: 0xc3, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0xe3, # LATIN SMALL LETTER A WITH BREVE - 0x0104: 0xa1, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xa2, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0xc5, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xe5, # LATIN SMALL LETTER C WITH ACUTE - 0x010c: 0xb2, # LATIN CAPITAL LETTER C WITH CARON - 0x010d: 0xb9, # LATIN SMALL LETTER C WITH CARON - 0x0110: 0xd0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xf0, # LATIN SMALL LETTER D WITH STROKE - 0x0118: 0xdd, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xfd, # LATIN SMALL LETTER E WITH OGONEK - 0x0141: 0xa3, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xb3, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xd1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xf1, # LATIN SMALL LETTER N WITH ACUTE - 0x0150: 0xd5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x0151: 0xf5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x0152: 0xbc, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xbd, # LATIN SMALL LIGATURE OE - 0x015a: 0xd7, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015b: 0xf7, # LATIN SMALL LETTER S WITH ACUTE - 0x0160: 0xa6, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xa8, # LATIN SMALL LETTER S WITH CARON - 0x0170: 0xd8, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x0171: 0xf8, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x0178: 0xbe, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0179: 0xac, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017a: 0xae, # LATIN SMALL LETTER Z WITH ACUTE - 0x017b: 0xaf, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017c: 0xbf, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017d: 0xb4, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0xb8, # LATIN SMALL LETTER Z WITH CARON - 0x0218: 0xaa, # LATIN CAPITAL LETTER S WITH COMMA BELOW - 0x0219: 0xba, # LATIN SMALL LETTER S WITH COMMA BELOW - 0x021a: 0xde, # LATIN CAPITAL LETTER T WITH COMMA BELOW - 0x021b: 0xfe, # LATIN SMALL LETTER T WITH COMMA BELOW - 0x201d: 0xb5, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0xa5, # DOUBLE LOW-9 QUOTATION MARK - 0x20ac: 0xa4, # EURO SIGN -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A7: 0xA7, # SECTION SIGN + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0xA2, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0xC5, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0xE5, # LATIN SMALL LETTER C WITH ACUTE + 0x010C: 0xB2, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xB9, # LATIN SMALL LETTER C WITH CARON + 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0xDD, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xFD, # LATIN SMALL LETTER E WITH OGONEK + 0x0141: 0xA3, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0xB3, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE + 0x0150: 0xD5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0xF5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0152: 0xBC, # LATIN CAPITAL LIGATURE OE + 0x0153: 0xBD, # LATIN SMALL LIGATURE OE + 0x015A: 0xD7, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015B: 0xF7, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0xA6, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xA8, # LATIN SMALL LETTER S WITH CARON + 0x0170: 0xD8, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0xF8, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0178: 0xBE, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0179: 0xAC, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017A: 0xAE, # LATIN SMALL LETTER Z WITH ACUTE + 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017D: 0xB4, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xB8, # LATIN SMALL LETTER Z WITH CARON + 0x0218: 0xAA, # LATIN CAPITAL LETTER S WITH COMMA BELOW + 0x0219: 0xBA, # LATIN SMALL LETTER S WITH COMMA BELOW + 0x021A: 0xDE, # LATIN CAPITAL LETTER T WITH COMMA BELOW + 0x021B: 0xFE, # LATIN SMALL LETTER T WITH COMMA BELOW + 0x201D: 0xB5, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xA5, # DOUBLE LOW-9 QUOTATION MARK + 0x20AC: 0xA4, # EURO SIGN +} + Index: iso8859_2.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_2.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- iso8859_2.py 24 Oct 2005 12:07:48 -0000 1.6 +++ iso8859_2.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,108 +186,108 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u0104' # 0xa1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u02d8' # 0xa2 -> BREVE - u'\u0141' # 0xa3 -> LATIN CAPITAL LETTER L WITH STROKE - u'\xa4' # 0xa4 -> CURRENCY SIGN - u'\u013d' # 0xa5 -> LATIN CAPITAL LETTER L WITH CARON - u'\u015a' # 0xa6 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xa8' # 0xa8 -> DIAERESIS - u'\u0160' # 0xa9 -> LATIN CAPITAL LETTER S WITH CARON - u'\u015e' # 0xaa -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u0164' # 0xab -> LATIN CAPITAL LETTER T WITH CARON - u'\u0179' # 0xac -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\xad' # 0xad -> SOFT HYPHEN - u'\u017d' # 0xae -> LATIN CAPITAL LETTER Z WITH CARON - u'\u017b' # 0xaf -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\u0105' # 0xb1 -> LATIN SMALL LETTER A WITH OGONEK - u'\u02db' # 0xb2 -> OGONEK - u'\u0142' # 0xb3 -> LATIN SMALL LETTER L WITH STROKE - u'\xb4' # 0xb4 -> ACUTE ACCENT - u'\u013e' # 0xb5 -> LATIN SMALL LETTER L WITH CARON - u'\u015b' # 0xb6 -> LATIN SMALL LETTER S WITH ACUTE - u'\u02c7' # 0xb7 -> CARON - u'\xb8' # 0xb8 -> CEDILLA - u'\u0161' # 0xb9 -> LATIN SMALL LETTER S WITH CARON - u'\u015f' # 0xba -> LATIN SMALL LETTER S WITH CEDILLA - u'\u0165' # 0xbb -> LATIN SMALL LETTER T WITH CARON - u'\u017a' # 0xbc -> LATIN SMALL LETTER Z WITH ACUTE - u'\u02dd' # 0xbd -> DOUBLE ACUTE ACCENT - u'\u017e' # 0xbe -> LATIN SMALL LETTER Z WITH CARON - u'\u017c' # 0xbf -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u0154' # 0xc0 -> LATIN CAPITAL LETTER R WITH ACUTE - u'\xc1' # 0xc1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xc2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xc3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xc4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0139' # 0xc5 -> LATIN CAPITAL LETTER L WITH ACUTE - u'\u0106' # 0xc6 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc7' # 0xc7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\u010c' # 0xc8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xc9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xca -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xcb -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u011a' # 0xcc -> LATIN CAPITAL LETTER E WITH CARON - u'\xcd' # 0xcd -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u010e' # 0xcf -> LATIN CAPITAL LETTER D WITH CARON - u'\u0110' # 0xd0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0143' # 0xd1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0147' # 0xd2 -> LATIN CAPITAL LETTER N WITH CARON - u'\xd3' # 0xd3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xd4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0150' # 0xd5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd6' # 0xd6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xd7 -> MULTIPLICATION SIGN - u'\u0158' # 0xd8 -> LATIN CAPITAL LETTER R WITH CARON - u'\u016e' # 0xd9 -> LATIN CAPITAL LETTER U WITH RING ABOVE - u'\xda' # 0xda -> LATIN CAPITAL LETTER U WITH ACUTE - u'\u0170' # 0xdb -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\xdc' # 0xdc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xdd -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\u0162' # 0xde -> LATIN CAPITAL LETTER T WITH CEDILLA - u'\xdf' # 0xdf -> LATIN SMALL LETTER SHARP S - u'\u0155' # 0xe0 -> LATIN SMALL LETTER R WITH ACUTE - u'\xe1' # 0xe1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xe2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xe3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xe4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u013a' # 0xe5 -> LATIN SMALL LETTER L WITH ACUTE - u'\u0107' # 0xe6 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe7' # 0xe7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\u010d' # 0xe8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xea -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xeb -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u011b' # 0xec -> LATIN SMALL LETTER E WITH CARON - u'\xed' # 0xed -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xee -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u010f' # 0xef -> LATIN SMALL LETTER D WITH CARON - u'\u0111' # 0xf0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0144' # 0xf1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0148' # 0xf2 -> LATIN SMALL LETTER N WITH CARON - u'\xf3' # 0xf3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xf4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0151' # 0xf5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\xf6' # 0xf6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xf7 -> DIVISION SIGN - u'\u0159' # 0xf8 -> LATIN SMALL LETTER R WITH CARON - u'\u016f' # 0xf9 -> LATIN SMALL LETTER U WITH RING ABOVE - u'\xfa' # 0xfa -> LATIN SMALL LETTER U WITH ACUTE - u'\u0171' # 0xfb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xfd -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0163' # 0xfe -> LATIN SMALL LETTER T WITH CEDILLA - u'\u02d9' # 0xff -> DOT ABOVE + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u02d8' # 0xA2 -> BREVE + u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\u013d' # 0xA5 -> LATIN CAPITAL LETTER L WITH CARON + u'\u015a' # 0xA6 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON + u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u0164' # 0xAB -> LATIN CAPITAL LETTER T WITH CARON + u'\u0179' # 0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON + u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK + u'\u02db' # 0xB2 -> OGONEK + u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\u013e' # 0xB5 -> LATIN SMALL LETTER L WITH CARON + u'\u015b' # 0xB6 -> LATIN SMALL LETTER S WITH ACUTE + u'\u02c7' # 0xB7 -> CARON + u'\xb8' # 0xB8 -> CEDILLA + u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON + u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA + u'\u0165' # 0xBB -> LATIN SMALL LETTER T WITH CARON + u'\u017a' # 0xBC -> LATIN SMALL LETTER Z WITH ACUTE + u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT + u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON + u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON + u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE + u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON + u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA + u'\u02d9' # 0xFF -> DOT ABOVE ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,106 +447,107 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00a7: 0xa7, # SECTION SIGN - 0x00a8: 0xa8, # DIAERESIS - 0x00ad: 0xad, # SOFT HYPHEN - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b4: 0xb4, # ACUTE ACCENT - 0x00b8: 0xb8, # CEDILLA - 0x00c1: 0xc1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xc2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c4: 0xc4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c7: 0xc7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0xc9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00cb: 0xcb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cd: 0xcd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d3: 0xd3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xd4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d6: 0xd6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xd7, # MULTIPLICATION SIGN - 0x00da: 0xda, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00dc: 0xdc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0xdd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00df: 0xdf, # LATIN SMALL LETTER SHARP S - 0x00e1: 0xe1, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0xe2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0xe4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e7: 0xe7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00eb: 0xeb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ed: 0xed, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0xee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xf7, # DIVISION SIGN - 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0xfd, # LATIN SMALL LETTER Y WITH ACUTE - 0x0102: 0xc3, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0xe3, # LATIN SMALL LETTER A WITH BREVE - 0x0104: 0xa1, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xb1, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0xc6, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xe6, # LATIN SMALL LETTER C WITH ACUTE - 0x010c: 0xc8, # LATIN CAPITAL LETTER C WITH CARON - 0x010d: 0xe8, # LATIN SMALL LETTER C WITH CARON - 0x010e: 0xcf, # LATIN CAPITAL LETTER D WITH CARON - 0x010f: 0xef, # LATIN SMALL LETTER D WITH CARON - 0x0110: 0xd0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xf0, # LATIN SMALL LETTER D WITH STROKE - 0x0118: 0xca, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xea, # LATIN SMALL LETTER E WITH OGONEK - 0x011a: 0xcc, # LATIN CAPITAL LETTER E WITH CARON - 0x011b: 0xec, # LATIN SMALL LETTER E WITH CARON - 0x0139: 0xc5, # LATIN CAPITAL LETTER L WITH ACUTE - 0x013a: 0xe5, # LATIN SMALL LETTER L WITH ACUTE - 0x013d: 0xa5, # LATIN CAPITAL LETTER L WITH CARON - 0x013e: 0xb5, # LATIN SMALL LETTER L WITH CARON - 0x0141: 0xa3, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xb3, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xd1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xf1, # LATIN SMALL LETTER N WITH ACUTE - 0x0147: 0xd2, # LATIN CAPITAL LETTER N WITH CARON - 0x0148: 0xf2, # LATIN SMALL LETTER N WITH CARON - 0x0150: 0xd5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x0151: 0xf5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x0154: 0xc0, # LATIN CAPITAL LETTER R WITH ACUTE - 0x0155: 0xe0, # LATIN SMALL LETTER R WITH ACUTE - 0x0158: 0xd8, # LATIN CAPITAL LETTER R WITH CARON - 0x0159: 0xf8, # LATIN SMALL LETTER R WITH CARON - 0x015a: 0xa6, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015b: 0xb6, # LATIN SMALL LETTER S WITH ACUTE - 0x015e: 0xaa, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015f: 0xba, # LATIN SMALL LETTER S WITH CEDILLA - 0x0160: 0xa9, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xb9, # LATIN SMALL LETTER S WITH CARON - 0x0162: 0xde, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x0163: 0xfe, # LATIN SMALL LETTER T WITH CEDILLA - 0x0164: 0xab, # LATIN CAPITAL LETTER T WITH CARON - 0x0165: 0xbb, # LATIN SMALL LETTER T WITH CARON - 0x016e: 0xd9, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x016f: 0xf9, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0170: 0xdb, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x0171: 0xfb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x0179: 0xac, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017a: 0xbc, # LATIN SMALL LETTER Z WITH ACUTE - 0x017b: 0xaf, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017c: 0xbf, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017d: 0xae, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0xbe, # LATIN SMALL LETTER Z WITH CARON - 0x02c7: 0xb7, # CARON - 0x02d8: 0xa2, # BREVE - 0x02d9: 0xff, # DOT ABOVE - 0x02db: 0xb2, # OGONEK - 0x02dd: 0xbd, # DOUBLE ACUTE ACCENT -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B8: 0xB8, # CEDILLA + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE + 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0xB1, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0xC6, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0xE6, # LATIN SMALL LETTER C WITH ACUTE + 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON + 0x010E: 0xCF, # LATIN CAPITAL LETTER D WITH CARON + 0x010F: 0xEF, # LATIN SMALL LETTER D WITH CARON + 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK + 0x011A: 0xCC, # LATIN CAPITAL LETTER E WITH CARON + 0x011B: 0xEC, # LATIN SMALL LETTER E WITH CARON + 0x0139: 0xC5, # LATIN CAPITAL LETTER L WITH ACUTE + 0x013A: 0xE5, # LATIN SMALL LETTER L WITH ACUTE + 0x013D: 0xA5, # LATIN CAPITAL LETTER L WITH CARON + 0x013E: 0xB5, # LATIN SMALL LETTER L WITH CARON + 0x0141: 0xA3, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0xB3, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE + 0x0147: 0xD2, # LATIN CAPITAL LETTER N WITH CARON + 0x0148: 0xF2, # LATIN SMALL LETTER N WITH CARON + 0x0150: 0xD5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0xF5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0154: 0xC0, # LATIN CAPITAL LETTER R WITH ACUTE + 0x0155: 0xE0, # LATIN SMALL LETTER R WITH ACUTE + 0x0158: 0xD8, # LATIN CAPITAL LETTER R WITH CARON + 0x0159: 0xF8, # LATIN SMALL LETTER R WITH CARON + 0x015A: 0xA6, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015B: 0xB6, # LATIN SMALL LETTER S WITH ACUTE + 0x015E: 0xAA, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015F: 0xBA, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0xA9, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xB9, # LATIN SMALL LETTER S WITH CARON + 0x0162: 0xDE, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x0163: 0xFE, # LATIN SMALL LETTER T WITH CEDILLA + 0x0164: 0xAB, # LATIN CAPITAL LETTER T WITH CARON + 0x0165: 0xBB, # LATIN SMALL LETTER T WITH CARON + 0x016E: 0xD9, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x016F: 0xF9, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0170: 0xDB, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0xFB, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0179: 0xAC, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017A: 0xBC, # LATIN SMALL LETTER Z WITH ACUTE + 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017D: 0xAE, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xBE, # LATIN SMALL LETTER Z WITH CARON + 0x02C7: 0xB7, # CARON + 0x02D8: 0xA2, # BREVE + 0x02D9: 0xFF, # DOT ABOVE + 0x02DB: 0xB2, # OGONEK + 0x02DD: 0xBD, # DOUBLE ACUTE ACCENT +} + Index: iso8859_3.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_3.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- iso8859_3.py 24 Oct 2005 12:07:48 -0000 1.6 +++ iso8859_3.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,108 +186,108 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u0126' # 0xa1 -> LATIN CAPITAL LETTER H WITH STROKE - u'\u02d8' # 0xa2 -> BREVE - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa4' # 0xa4 -> CURRENCY SIGN + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0126' # 0xA1 -> LATIN CAPITAL LETTER H WITH STROKE + u'\u02d8' # 0xA2 -> BREVE + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN u'\ufffe' - u'\u0124' # 0xa6 -> LATIN CAPITAL LETTER H WITH CIRCUMFLEX - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xa8' # 0xa8 -> DIAERESIS - u'\u0130' # 0xa9 -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u015e' # 0xaa -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u011e' # 0xab -> LATIN CAPITAL LETTER G WITH BREVE - u'\u0134' # 0xac -> LATIN CAPITAL LETTER J WITH CIRCUMFLEX - u'\xad' # 0xad -> SOFT HYPHEN + u'\u0124' # 0xA6 -> LATIN CAPITAL LETTER H WITH CIRCUMFLEX + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\u0130' # 0xA9 -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u011e' # 0xAB -> LATIN CAPITAL LETTER G WITH BREVE + u'\u0134' # 0xAC -> LATIN CAPITAL LETTER J WITH CIRCUMFLEX + u'\xad' # 0xAD -> SOFT HYPHEN u'\ufffe' - u'\u017b' # 0xaf -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\u0127' # 0xb1 -> LATIN SMALL LETTER H WITH STROKE - u'\xb2' # 0xb2 -> SUPERSCRIPT TWO - u'\xb3' # 0xb3 -> SUPERSCRIPT THREE - u'\xb4' # 0xb4 -> ACUTE ACCENT - u'\xb5' # 0xb5 -> MICRO SIGN - u'\u0125' # 0xb6 -> LATIN SMALL LETTER H WITH CIRCUMFLEX - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\xb8' # 0xb8 -> CEDILLA - u'\u0131' # 0xb9 -> LATIN SMALL LETTER DOTLESS I - u'\u015f' # 0xba -> LATIN SMALL LETTER S WITH CEDILLA - u'\u011f' # 0xbb -> LATIN SMALL LETTER G WITH BREVE - u'\u0135' # 0xbc -> LATIN SMALL LETTER J WITH CIRCUMFLEX - u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF + u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\u0127' # 0xB1 -> LATIN SMALL LETTER H WITH STROKE + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u0125' # 0xB6 -> LATIN SMALL LETTER H WITH CIRCUMFLEX + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\u0131' # 0xB9 -> LATIN SMALL LETTER DOTLESS I + u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA + u'\u011f' # 0xBB -> LATIN SMALL LETTER G WITH BREVE + u'\u0135' # 0xBC -> LATIN SMALL LETTER J WITH CIRCUMFLEX + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF u'\ufffe' - u'\u017c' # 0xbf -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\xc0' # 0xc0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xc1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xc2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\ufffe' - u'\xc4' # 0xc4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u010a' # 0xc5 -> LATIN CAPITAL LETTER C WITH DOT ABOVE - u'\u0108' # 0xc6 -> LATIN CAPITAL LETTER C WITH CIRCUMFLEX - u'\xc7' # 0xc7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xc8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xc9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xcb -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xcc -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xcd -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xcf -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u010a' # 0xC5 -> LATIN CAPITAL LETTER C WITH DOT ABOVE + u'\u0108' # 0xC6 -> LATIN CAPITAL LETTER C WITH CIRCUMFLEX + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\ufffe' - u'\xd1' # 0xd1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xd2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xd3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xd4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0120' # 0xd5 -> LATIN CAPITAL LETTER G WITH DOT ABOVE - u'\xd6' # 0xd6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xd7 -> MULTIPLICATION SIGN - u'\u011c' # 0xd8 -> LATIN CAPITAL LETTER G WITH CIRCUMFLEX - u'\xd9' # 0xd9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xda -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xdb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xdc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u016c' # 0xdd -> LATIN CAPITAL LETTER U WITH BREVE - u'\u015c' # 0xde -> LATIN CAPITAL LETTER S WITH CIRCUMFLEX - u'\xdf' # 0xdf -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xe0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xe1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xe2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0120' # 0xD5 -> LATIN CAPITAL LETTER G WITH DOT ABOVE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u011c' # 0xD8 -> LATIN CAPITAL LETTER G WITH CIRCUMFLEX + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u016c' # 0xDD -> LATIN CAPITAL LETTER U WITH BREVE + u'\u015c' # 0xDE -> LATIN CAPITAL LETTER S WITH CIRCUMFLEX + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\ufffe' - u'\xe4' # 0xe4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u010b' # 0xe5 -> LATIN SMALL LETTER C WITH DOT ABOVE - u'\u0109' # 0xe6 -> LATIN SMALL LETTER C WITH CIRCUMFLEX - u'\xe7' # 0xe7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xe8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xea -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xeb -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xec -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xed -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xee -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xef -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u010b' # 0xE5 -> LATIN SMALL LETTER C WITH DOT ABOVE + u'\u0109' # 0xE6 -> LATIN SMALL LETTER C WITH CIRCUMFLEX + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS u'\ufffe' - u'\xf1' # 0xf1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xf2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xf3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xf4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0121' # 0xf5 -> LATIN SMALL LETTER G WITH DOT ABOVE - u'\xf6' # 0xf6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xf7 -> DIVISION SIGN - u'\u011d' # 0xf8 -> LATIN SMALL LETTER G WITH CIRCUMFLEX - u'\xf9' # 0xf9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xfa -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xfb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u016d' # 0xfd -> LATIN SMALL LETTER U WITH BREVE - u'\u015d' # 0xfe -> LATIN SMALL LETTER S WITH CIRCUMFLEX - u'\u02d9' # 0xff -> DOT ABOVE + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0121' # 0xF5 -> LATIN SMALL LETTER G WITH DOT ABOVE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u011d' # 0xF8 -> LATIN SMALL LETTER G WITH CIRCUMFLEX + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u016d' # 0xFD -> LATIN SMALL LETTER U WITH BREVE + u'\u015d' # 0xFE -> LATIN SMALL LETTER S WITH CIRCUMFLEX + u'\u02d9' # 0xFF -> DOT ABOVE ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,99 +447,100 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a3: 0xa3, # POUND SIGN - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00a7: 0xa7, # SECTION SIGN - 0x00a8: 0xa8, # DIAERESIS - 0x00ad: 0xad, # SOFT HYPHEN - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b2: 0xb2, # SUPERSCRIPT TWO - 0x00b3: 0xb3, # SUPERSCRIPT THREE - 0x00b4: 0xb4, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00b8: 0xb8, # CEDILLA - 0x00bd: 0xbd, # VULGAR FRACTION ONE HALF - 0x00c0: 0xc0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0xc1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xc2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c4: 0xc4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c7: 0xc7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0xc8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0xc9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0xca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0xcb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0xcc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0xcd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0xcf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d1: 0xd1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xd2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xd3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xd4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d6: 0xd6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xd7, # MULTIPLICATION SIGN - 0x00d9: 0xd9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xda, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xdb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0xdc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0xdf, # LATIN SMALL LETTER SHARP S - 0x00e0: 0xe0, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0xe1, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0xe2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0xe4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e7: 0xe7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0xe8, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0xea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0xeb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0xec, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0xed, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0xee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0xef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0xf1, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0xf2, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xf7, # DIVISION SIGN - 0x00f9: 0xf9, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0xfb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0108: 0xc6, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX - 0x0109: 0xe6, # LATIN SMALL LETTER C WITH CIRCUMFLEX - 0x010a: 0xc5, # LATIN CAPITAL LETTER C WITH DOT ABOVE - 0x010b: 0xe5, # LATIN SMALL LETTER C WITH DOT ABOVE - 0x011c: 0xd8, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX - 0x011d: 0xf8, # LATIN SMALL LETTER G WITH CIRCUMFLEX - 0x011e: 0xab, # LATIN CAPITAL LETTER G WITH BREVE - 0x011f: 0xbb, # LATIN SMALL LETTER G WITH BREVE - 0x0120: 0xd5, # LATIN CAPITAL LETTER G WITH DOT ABOVE - 0x0121: 0xf5, # LATIN SMALL LETTER G WITH DOT ABOVE - 0x0124: 0xa6, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX - 0x0125: 0xb6, # LATIN SMALL LETTER H WITH CIRCUMFLEX - 0x0126: 0xa1, # LATIN CAPITAL LETTER H WITH STROKE - 0x0127: 0xb1, # LATIN SMALL LETTER H WITH STROKE - 0x0130: 0xa9, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0xb9, # LATIN SMALL LETTER DOTLESS I - 0x0134: 0xac, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX - 0x0135: 0xbc, # LATIN SMALL LETTER J WITH CIRCUMFLEX - 0x015c: 0xde, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX - 0x015d: 0xfe, # LATIN SMALL LETTER S WITH CIRCUMFLEX - 0x015e: 0xaa, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015f: 0xba, # LATIN SMALL LETTER S WITH CEDILLA - 0x016c: 0xdd, # LATIN CAPITAL LETTER U WITH BREVE - 0x016d: 0xfd, # LATIN SMALL LETTER U WITH BREVE - 0x017b: 0xaf, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017c: 0xbf, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x02d8: 0xa2, # BREVE - 0x02d9: 0xff, # DOT ABOVE -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0108: 0xC6, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX + 0x0109: 0xE6, # LATIN SMALL LETTER C WITH CIRCUMFLEX + 0x010A: 0xC5, # LATIN CAPITAL LETTER C WITH DOT ABOVE + 0x010B: 0xE5, # LATIN SMALL LETTER C WITH DOT ABOVE + 0x011C: 0xD8, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX + 0x011D: 0xF8, # LATIN SMALL LETTER G WITH CIRCUMFLEX + 0x011E: 0xAB, # LATIN CAPITAL LETTER G WITH BREVE + 0x011F: 0xBB, # LATIN SMALL LETTER G WITH BREVE + 0x0120: 0xD5, # LATIN CAPITAL LETTER G WITH DOT ABOVE + 0x0121: 0xF5, # LATIN SMALL LETTER G WITH DOT ABOVE + 0x0124: 0xA6, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX + 0x0125: 0xB6, # LATIN SMALL LETTER H WITH CIRCUMFLEX + 0x0126: 0xA1, # LATIN CAPITAL LETTER H WITH STROKE + 0x0127: 0xB1, # LATIN SMALL LETTER H WITH STROKE + 0x0130: 0xA9, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0xB9, # LATIN SMALL LETTER DOTLESS I + 0x0134: 0xAC, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX + 0x0135: 0xBC, # LATIN SMALL LETTER J WITH CIRCUMFLEX + 0x015C: 0xDE, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX + 0x015D: 0xFE, # LATIN SMALL LETTER S WITH CIRCUMFLEX + 0x015E: 0xAA, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015F: 0xBA, # LATIN SMALL LETTER S WITH CEDILLA + 0x016C: 0xDD, # LATIN CAPITAL LETTER U WITH BREVE + 0x016D: 0xFD, # LATIN SMALL LETTER U WITH BREVE + 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x02D8: 0xA2, # BREVE + 0x02D9: 0xFF, # DOT ABOVE +} + Index: iso8859_4.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_4.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- iso8859_4.py 24 Oct 2005 12:07:48 -0000 1.6 +++ iso8859_4.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,108 +186,108 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u0104' # 0xa1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u0138' # 0xa2 -> LATIN SMALL LETTER KRA - u'\u0156' # 0xa3 -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\xa4' # 0xa4 -> CURRENCY SIGN - u'\u0128' # 0xa5 -> LATIN CAPITAL LETTER I WITH TILDE - u'\u013b' # 0xa6 -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xa8' # 0xa8 -> DIAERESIS - u'\u0160' # 0xa9 -> LATIN CAPITAL LETTER S WITH CARON - u'\u0112' # 0xaa -> LATIN CAPITAL LETTER E WITH MACRON - u'\u0122' # 0xab -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u0166' # 0xac -> LATIN CAPITAL LETTER T WITH STROKE - u'\xad' # 0xad -> SOFT HYPHEN - u'\u017d' # 0xae -> LATIN CAPITAL LETTER Z WITH CARON - u'\xaf' # 0xaf -> MACRON - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\u0105' # 0xb1 -> LATIN SMALL LETTER A WITH OGONEK - u'\u02db' # 0xb2 -> OGONEK - u'\u0157' # 0xb3 -> LATIN SMALL LETTER R WITH CEDILLA - u'\xb4' # 0xb4 -> ACUTE ACCENT - u'\u0129' # 0xb5 -> LATIN SMALL LETTER I WITH TILDE - u'\u013c' # 0xb6 -> LATIN SMALL LETTER L WITH CEDILLA - u'\u02c7' # 0xb7 -> CARON - u'\xb8' # 0xb8 -> CEDILLA - u'\u0161' # 0xb9 -> LATIN SMALL LETTER S WITH CARON - u'\u0113' # 0xba -> LATIN SMALL LETTER E WITH MACRON - u'\u0123' # 0xbb -> LATIN SMALL LETTER G WITH CEDILLA - u'\u0167' # 0xbc -> LATIN SMALL LETTER T WITH STROKE - u'\u014a' # 0xbd -> LATIN CAPITAL LETTER ENG - u'\u017e' # 0xbe -> LATIN SMALL LETTER Z WITH CARON - u'\u014b' # 0xbf -> LATIN SMALL LETTER ENG - u'\u0100' # 0xc0 -> LATIN CAPITAL LETTER A WITH MACRON - u'\xc1' # 0xc1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xc2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xc3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xc4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xc5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xc6 -> LATIN CAPITAL LETTER AE - u'\u012e' # 0xc7 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u010c' # 0xc8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xc9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xca -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xcb -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u0116' # 0xcc -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\xcd' # 0xcd -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u012a' # 0xcf -> LATIN CAPITAL LETTER I WITH MACRON - u'\u0110' # 0xd0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0145' # 0xd1 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\u014c' # 0xd2 -> LATIN CAPITAL LETTER O WITH MACRON - u'\u0136' # 0xd3 -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\xd4' # 0xd4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xd5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xd6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xd7 -> MULTIPLICATION SIGN - u'\xd8' # 0xd8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\u0172' # 0xd9 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\xda' # 0xda -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xdb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xdc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0168' # 0xdd -> LATIN CAPITAL LETTER U WITH TILDE - u'\u016a' # 0xde -> LATIN CAPITAL LETTER U WITH MACRON - u'\xdf' # 0xdf -> LATIN SMALL LETTER SHARP S - u'\u0101' # 0xe0 -> LATIN SMALL LETTER A WITH MACRON - u'\xe1' # 0xe1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xe2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xe3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xe4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xe5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xe6 -> LATIN SMALL LETTER AE - u'\u012f' # 0xe7 -> LATIN SMALL LETTER I WITH OGONEK - u'\u010d' # 0xe8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xea -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xeb -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0117' # 0xec -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\xed' # 0xed -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xee -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u012b' # 0xef -> LATIN SMALL LETTER I WITH MACRON - u'\u0111' # 0xf0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0146' # 0xf1 -> LATIN SMALL LETTER N WITH CEDILLA - u'\u014d' # 0xf2 -> LATIN SMALL LETTER O WITH MACRON - u'\u0137' # 0xf3 -> LATIN SMALL LETTER K WITH CEDILLA - u'\xf4' # 0xf4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xf5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xf6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xf7 -> DIVISION SIGN - u'\xf8' # 0xf8 -> LATIN SMALL LETTER O WITH STROKE - u'\u0173' # 0xf9 -> LATIN SMALL LETTER U WITH OGONEK - u'\xfa' # 0xfa -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xfb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0169' # 0xfd -> LATIN SMALL LETTER U WITH TILDE - u'\u016b' # 0xfe -> LATIN SMALL LETTER U WITH MACRON - u'\u02d9' # 0xff -> DOT ABOVE + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0138' # 0xA2 -> LATIN SMALL LETTER KRA + u'\u0156' # 0xA3 -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\u0128' # 0xA5 -> LATIN CAPITAL LETTER I WITH TILDE + u'\u013b' # 0xA6 -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0112' # 0xAA -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0122' # 0xAB -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u0166' # 0xAC -> LATIN CAPITAL LETTER T WITH STROKE + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK + u'\u02db' # 0xB2 -> OGONEK + u'\u0157' # 0xB3 -> LATIN SMALL LETTER R WITH CEDILLA + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\u0129' # 0xB5 -> LATIN SMALL LETTER I WITH TILDE + u'\u013c' # 0xB6 -> LATIN SMALL LETTER L WITH CEDILLA + u'\u02c7' # 0xB7 -> CARON + u'\xb8' # 0xB8 -> CEDILLA + u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON + u'\u0113' # 0xBA -> LATIN SMALL LETTER E WITH MACRON + u'\u0123' # 0xBB -> LATIN SMALL LETTER G WITH CEDILLA + u'\u0167' # 0xBC -> LATIN SMALL LETTER T WITH STROKE + u'\u014a' # 0xBD -> LATIN CAPITAL LETTER ENG + u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON + u'\u014b' # 0xBF -> LATIN SMALL LETTER ENG + u'\u0100' # 0xC0 -> LATIN CAPITAL LETTER A WITH MACRON + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\u012e' # 0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u0116' # 0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u012a' # 0xCF -> LATIN CAPITAL LETTER I WITH MACRON + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0145' # 0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u014c' # 0xD2 -> LATIN CAPITAL LETTER O WITH MACRON + u'\u0136' # 0xD3 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\u0172' # 0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0168' # 0xDD -> LATIN CAPITAL LETTER U WITH TILDE + u'\u016a' # 0xDE -> LATIN CAPITAL LETTER U WITH MACRON + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\u0101' # 0xE0 -> LATIN SMALL LETTER A WITH MACRON + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\u012f' # 0xE7 -> LATIN SMALL LETTER I WITH OGONEK + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0117' # 0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u012b' # 0xEF -> LATIN SMALL LETTER I WITH MACRON + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0146' # 0xF1 -> LATIN SMALL LETTER N WITH CEDILLA + u'\u014d' # 0xF2 -> LATIN SMALL LETTER O WITH MACRON + u'\u0137' # 0xF3 -> LATIN SMALL LETTER K WITH CEDILLA + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\u0173' # 0xF9 -> LATIN SMALL LETTER U WITH OGONEK + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0169' # 0xFD -> LATIN SMALL LETTER U WITH TILDE + u'\u016b' # 0xFE -> LATIN SMALL LETTER U WITH MACRON + u'\u02d9' # 0xFF -> DOT ABOVE ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,106 +447,107 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00a7: 0xa7, # SECTION SIGN - 0x00a8: 0xa8, # DIAERESIS - 0x00ad: 0xad, # SOFT HYPHEN - 0x00af: 0xaf, # MACRON - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b4: 0xb4, # ACUTE ACCENT - 0x00b8: 0xb8, # CEDILLA - 0x00c1: 0xc1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xc2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0xc3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0xc4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0xc5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0xc6, # LATIN CAPITAL LETTER AE - 0x00c9: 0xc9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00cb: 0xcb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cd: 0xcd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d4: 0xd4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xd5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0xd6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xd7, # MULTIPLICATION SIGN - 0x00d8: 0xd8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00da: 0xda, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xdb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0xdc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0xdf, # LATIN SMALL LETTER SHARP S - 0x00e1: 0xe1, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0xe2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0xe3, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0xe4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0xe5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0xe6, # LATIN SMALL LETTER AE - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00eb: 0xeb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ed: 0xed, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0xee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0xf5, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xf7, # DIVISION SIGN - 0x00f8: 0xf8, # LATIN SMALL LETTER O WITH STROKE - 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0xfb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0100: 0xc0, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0xe0, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0xa1, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xb1, # LATIN SMALL LETTER A WITH OGONEK - 0x010c: 0xc8, # LATIN CAPITAL LETTER C WITH CARON - 0x010d: 0xe8, # LATIN SMALL LETTER C WITH CARON - 0x0110: 0xd0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xf0, # LATIN SMALL LETTER D WITH STROKE - 0x0112: 0xaa, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0xba, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0xcc, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0xec, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0xca, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xea, # LATIN SMALL LETTER E WITH OGONEK - 0x0122: 0xab, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0xbb, # LATIN SMALL LETTER G WITH CEDILLA - 0x0128: 0xa5, # LATIN CAPITAL LETTER I WITH TILDE - 0x0129: 0xb5, # LATIN SMALL LETTER I WITH TILDE - 0x012a: 0xcf, # LATIN CAPITAL LETTER I WITH MACRON - 0x012b: 0xef, # LATIN SMALL LETTER I WITH MACRON - 0x012e: 0xc7, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012f: 0xe7, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0xd3, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0xf3, # LATIN SMALL LETTER K WITH CEDILLA - 0x0138: 0xa2, # LATIN SMALL LETTER KRA - 0x013b: 0xa6, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013c: 0xb6, # LATIN SMALL LETTER L WITH CEDILLA - 0x0145: 0xd1, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0xf1, # LATIN SMALL LETTER N WITH CEDILLA - 0x014a: 0xbd, # LATIN CAPITAL LETTER ENG - 0x014b: 0xbf, # LATIN SMALL LETTER ENG - 0x014c: 0xd2, # LATIN CAPITAL LETTER O WITH MACRON - 0x014d: 0xf2, # LATIN SMALL LETTER O WITH MACRON - 0x0156: 0xa3, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x0157: 0xb3, # LATIN SMALL LETTER R WITH CEDILLA - 0x0160: 0xa9, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xb9, # LATIN SMALL LETTER S WITH CARON - 0x0166: 0xac, # LATIN CAPITAL LETTER T WITH STROKE - 0x0167: 0xbc, # LATIN SMALL LETTER T WITH STROKE - 0x0168: 0xdd, # LATIN CAPITAL LETTER U WITH TILDE - 0x0169: 0xfd, # LATIN SMALL LETTER U WITH TILDE - 0x016a: 0xde, # LATIN CAPITAL LETTER U WITH MACRON - 0x016b: 0xfe, # LATIN SMALL LETTER U WITH MACRON - 0x0172: 0xd9, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0xf9, # LATIN SMALL LETTER U WITH OGONEK - 0x017d: 0xae, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0xbe, # LATIN SMALL LETTER Z WITH CARON - 0x02c7: 0xb7, # CARON - 0x02d9: 0xff, # DOT ABOVE - 0x02db: 0xb2, # OGONEK -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B8: 0xB8, # CEDILLA + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0xC0, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0xE0, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0xB1, # LATIN SMALL LETTER A WITH OGONEK + 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON + 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE + 0x0112: 0xAA, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0xBA, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0xCC, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0xEC, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0xAB, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0xBB, # LATIN SMALL LETTER G WITH CEDILLA + 0x0128: 0xA5, # LATIN CAPITAL LETTER I WITH TILDE + 0x0129: 0xB5, # LATIN SMALL LETTER I WITH TILDE + 0x012A: 0xCF, # LATIN CAPITAL LETTER I WITH MACRON + 0x012B: 0xEF, # LATIN SMALL LETTER I WITH MACRON + 0x012E: 0xC7, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012F: 0xE7, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0xD3, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0xF3, # LATIN SMALL LETTER K WITH CEDILLA + 0x0138: 0xA2, # LATIN SMALL LETTER KRA + 0x013B: 0xA6, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013C: 0xB6, # LATIN SMALL LETTER L WITH CEDILLA + 0x0145: 0xD1, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0xF1, # LATIN SMALL LETTER N WITH CEDILLA + 0x014A: 0xBD, # LATIN CAPITAL LETTER ENG + 0x014B: 0xBF, # LATIN SMALL LETTER ENG + 0x014C: 0xD2, # LATIN CAPITAL LETTER O WITH MACRON + 0x014D: 0xF2, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0xA3, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0xB3, # LATIN SMALL LETTER R WITH CEDILLA + 0x0160: 0xA9, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xB9, # LATIN SMALL LETTER S WITH CARON + 0x0166: 0xAC, # LATIN CAPITAL LETTER T WITH STROKE + 0x0167: 0xBC, # LATIN SMALL LETTER T WITH STROKE + 0x0168: 0xDD, # LATIN CAPITAL LETTER U WITH TILDE + 0x0169: 0xFD, # LATIN SMALL LETTER U WITH TILDE + 0x016A: 0xDE, # LATIN CAPITAL LETTER U WITH MACRON + 0x016B: 0xFE, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0xD9, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0xF9, # LATIN SMALL LETTER U WITH OGONEK + 0x017D: 0xAE, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xBE, # LATIN SMALL LETTER Z WITH CARON + 0x02C7: 0xB7, # CARON + 0x02D9: 0xFF, # DOT ABOVE + 0x02DB: 0xB2, # OGONEK +} + Index: iso8859_5.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_5.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- iso8859_5.py 24 Oct 2005 12:07:48 -0000 1.6 +++ iso8859_5.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,108 +186,108 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u0401' # 0xa1 -> CYRILLIC CAPITAL LETTER IO - u'\u0402' # 0xa2 -> CYRILLIC CAPITAL LETTER DJE - u'\u0403' # 0xa3 -> CYRILLIC CAPITAL LETTER GJE - u'\u0404' # 0xa4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u0405' # 0xa5 -> CYRILLIC CAPITAL LETTER DZE - u'\u0406' # 0xa6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0407' # 0xa7 -> CYRILLIC CAPITAL LETTER YI - u'\u0408' # 0xa8 -> CYRILLIC CAPITAL LETTER JE - u'\u0409' # 0xa9 -> CYRILLIC CAPITAL LETTER LJE - u'\u040a' # 0xaa -> CYRILLIC CAPITAL LETTER NJE - u'\u040b' # 0xab -> CYRILLIC CAPITAL LETTER TSHE - u'\u040c' # 0xac -> CYRILLIC CAPITAL LETTER KJE - u'\xad' # 0xad -> SOFT HYPHEN - u'\u040e' # 0xae -> CYRILLIC CAPITAL LETTER SHORT U - u'\u040f' # 0xaf -> CYRILLIC CAPITAL LETTER DZHE - u'\u0410' # 0xb0 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xb1 -> CYRILLIC CAPITAL LETTER BE - u'\u0412' # 0xb2 -> CYRILLIC CAPITAL LETTER VE - u'\u0413' # 0xb3 -> CYRILLIC CAPITAL LETTER GHE - u'\u0414' # 0xb4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xb5 -> CYRILLIC CAPITAL LETTER IE - u'\u0416' # 0xb6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0417' # 0xb7 -> CYRILLIC CAPITAL LETTER ZE - u'\u0418' # 0xb8 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xb9 -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xba -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xbb -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xbc -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xbd -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xbe -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xbf -> CYRILLIC CAPITAL LETTER PE - u'\u0420' # 0xc0 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xc1 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xc2 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xc3 -> CYRILLIC CAPITAL LETTER U - u'\u0424' # 0xc4 -> CYRILLIC CAPITAL LETTER EF - u'\u0425' # 0xc5 -> CYRILLIC CAPITAL LETTER HA - u'\u0426' # 0xc6 -> CYRILLIC CAPITAL LETTER TSE - u'\u0427' # 0xc7 -> CYRILLIC CAPITAL LETTER CHE - u'\u0428' # 0xc8 -> CYRILLIC CAPITAL LETTER SHA - u'\u0429' # 0xc9 -> CYRILLIC CAPITAL LETTER SHCHA - u'\u042a' # 0xca -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u042b' # 0xcb -> CYRILLIC CAPITAL LETTER YERU - u'\u042c' # 0xcc -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042d' # 0xcd -> CYRILLIC CAPITAL LETTER E - u'\u042e' # 0xce -> CYRILLIC CAPITAL LETTER YU - u'\u042f' # 0xcf -> CYRILLIC CAPITAL LETTER YA - u'\u0430' # 0xd0 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xd1 -> CYRILLIC SMALL LETTER BE - u'\u0432' # 0xd2 -> CYRILLIC SMALL LETTER VE - u'\u0433' # 0xd3 -> CYRILLIC SMALL LETTER GHE - u'\u0434' # 0xd4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xd5 -> CYRILLIC SMALL LETTER IE - u'\u0436' # 0xd6 -> CYRILLIC SMALL LETTER ZHE - u'\u0437' # 0xd7 -> CYRILLIC SMALL LETTER ZE - u'\u0438' # 0xd8 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xd9 -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xda -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xdb -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xdc -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xdd -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xde -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xdf -> CYRILLIC SMALL LETTER PE - u'\u0440' # 0xe0 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xe1 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xe2 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xe3 -> CYRILLIC SMALL LETTER U - u'\u0444' # 0xe4 -> CYRILLIC SMALL LETTER EF - u'\u0445' # 0xe5 -> CYRILLIC SMALL LETTER HA - u'\u0446' # 0xe6 -> CYRILLIC SMALL LETTER TSE - u'\u0447' # 0xe7 -> CYRILLIC SMALL LETTER CHE - u'\u0448' # 0xe8 -> CYRILLIC SMALL LETTER SHA - u'\u0449' # 0xe9 -> CYRILLIC SMALL LETTER SHCHA - u'\u044a' # 0xea -> CYRILLIC SMALL LETTER HARD SIGN - u'\u044b' # 0xeb -> CYRILLIC SMALL LETTER YERU - u'\u044c' # 0xec -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044d' # 0xed -> CYRILLIC SMALL LETTER E - u'\u044e' # 0xee -> CYRILLIC SMALL LETTER YU - u'\u044f' # 0xef -> CYRILLIC SMALL LETTER YA - u'\u2116' # 0xf0 -> NUMERO SIGN - u'\u0451' # 0xf1 -> CYRILLIC SMALL LETTER IO - u'\u0452' # 0xf2 -> CYRILLIC SMALL LETTER DJE - u'\u0453' # 0xf3 -> CYRILLIC SMALL LETTER GJE - u'\u0454' # 0xf4 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u0455' # 0xf5 -> CYRILLIC SMALL LETTER DZE - u'\u0456' # 0xf6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0457' # 0xf7 -> CYRILLIC SMALL LETTER YI - u'\u0458' # 0xf8 -> CYRILLIC SMALL LETTER JE - u'\u0459' # 0xf9 -> CYRILLIC SMALL LETTER LJE - u'\u045a' # 0xfa -> CYRILLIC SMALL LETTER NJE - u'\u045b' # 0xfb -> CYRILLIC SMALL LETTER TSHE - u'\u045c' # 0xfc -> CYRILLIC SMALL LETTER KJE - u'\xa7' # 0xfd -> SECTION SIGN - u'\u045e' # 0xfe -> CYRILLIC SMALL LETTER SHORT U - u'\u045f' # 0xff -> CYRILLIC SMALL LETTER DZHE + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0401' # 0xA1 -> CYRILLIC CAPITAL LETTER IO + u'\u0402' # 0xA2 -> CYRILLIC CAPITAL LETTER DJE + u'\u0403' # 0xA3 -> CYRILLIC CAPITAL LETTER GJE + u'\u0404' # 0xA4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0405' # 0xA5 -> CYRILLIC CAPITAL LETTER DZE + u'\u0406' # 0xA6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0407' # 0xA7 -> CYRILLIC CAPITAL LETTER YI + u'\u0408' # 0xA8 -> CYRILLIC CAPITAL LETTER JE + u'\u0409' # 0xA9 -> CYRILLIC CAPITAL LETTER LJE + u'\u040a' # 0xAA -> CYRILLIC CAPITAL LETTER NJE + u'\u040b' # 0xAB -> CYRILLIC CAPITAL LETTER TSHE + u'\u040c' # 0xAC -> CYRILLIC CAPITAL LETTER KJE + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u040e' # 0xAE -> CYRILLIC CAPITAL LETTER SHORT U + u'\u040f' # 0xAF -> CYRILLIC CAPITAL LETTER DZHE + u'\u0410' # 0xB0 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0xB1 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0xB2 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0xB3 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0xB4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0xB5 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0xB6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0xB7 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0xB8 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0xB9 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0xBA -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0xBB -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0xBC -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0xBD -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0xBE -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0xBF -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0xC0 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0xC1 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0xC2 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0xC3 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0xC4 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0xC5 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0xC6 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0xC7 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0xC8 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0xC9 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0xCA -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0xCB -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0xCC -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0xCD -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0xCE -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0xCF -> CYRILLIC CAPITAL LETTER YA + u'\u0430' # 0xD0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xD1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0xD2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0xD3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0xD4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xD5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0xD7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0xD8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xD9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xDA -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xDB -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xDC -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xDD -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xDE -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xDF -> CYRILLIC SMALL LETTER PE + u'\u0440' # 0xE0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xE1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xE2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xE3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0xE4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0xE5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0xE6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0xE7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0xE8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0xE9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0xEA -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0xEB -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0xEC -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0xED -> CYRILLIC SMALL LETTER E + u'\u044e' # 0xEE -> CYRILLIC SMALL LETTER YU + u'\u044f' # 0xEF -> CYRILLIC SMALL LETTER YA + u'\u2116' # 0xF0 -> NUMERO SIGN + u'\u0451' # 0xF1 -> CYRILLIC SMALL LETTER IO + u'\u0452' # 0xF2 -> CYRILLIC SMALL LETTER DJE + u'\u0453' # 0xF3 -> CYRILLIC SMALL LETTER GJE + u'\u0454' # 0xF4 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0455' # 0xF5 -> CYRILLIC SMALL LETTER DZE + u'\u0456' # 0xF6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0457' # 0xF7 -> CYRILLIC SMALL LETTER YI + u'\u0458' # 0xF8 -> CYRILLIC SMALL LETTER JE + u'\u0459' # 0xF9 -> CYRILLIC SMALL LETTER LJE + u'\u045a' # 0xFA -> CYRILLIC SMALL LETTER NJE + u'\u045b' # 0xFB -> CYRILLIC SMALL LETTER TSHE + u'\u045c' # 0xFC -> CYRILLIC SMALL LETTER KJE + u'\xa7' # 0xFD -> SECTION SIGN + u'\u045e' # 0xFE -> CYRILLIC SMALL LETTER SHORT U + u'\u045f' # 0xFF -> CYRILLIC SMALL LETTER DZHE ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,106 +447,107 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a7: 0xfd, # SECTION SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x0401: 0xa1, # CYRILLIC CAPITAL LETTER IO - 0x0402: 0xa2, # CYRILLIC CAPITAL LETTER DJE - 0x0403: 0xa3, # CYRILLIC CAPITAL LETTER GJE - 0x0404: 0xa4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0405: 0xa5, # CYRILLIC CAPITAL LETTER DZE - 0x0406: 0xa6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0407: 0xa7, # CYRILLIC CAPITAL LETTER YI - 0x0408: 0xa8, # CYRILLIC CAPITAL LETTER JE - 0x0409: 0xa9, # CYRILLIC CAPITAL LETTER LJE - 0x040a: 0xaa, # CYRILLIC CAPITAL LETTER NJE - 0x040b: 0xab, # CYRILLIC CAPITAL LETTER TSHE - 0x040c: 0xac, # CYRILLIC CAPITAL LETTER KJE - 0x040e: 0xae, # CYRILLIC CAPITAL LETTER SHORT U - 0x040f: 0xaf, # CYRILLIC CAPITAL LETTER DZHE - 0x0410: 0xb0, # CYRILLIC CAPITAL LETTER A - 0x0411: 0xb1, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0xb2, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0xb3, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0xb4, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0xb5, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0xb6, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0xb7, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0xb8, # CYRILLIC CAPITAL LETTER I - 0x0419: 0xb9, # CYRILLIC CAPITAL LETTER SHORT I - 0x041a: 0xba, # CYRILLIC CAPITAL LETTER KA - 0x041b: 0xbb, # CYRILLIC CAPITAL LETTER EL - 0x041c: 0xbc, # CYRILLIC CAPITAL LETTER EM - 0x041d: 0xbd, # CYRILLIC CAPITAL LETTER EN - 0x041e: 0xbe, # CYRILLIC CAPITAL LETTER O - 0x041f: 0xbf, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0xc0, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0xc1, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0xc2, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0xc3, # CYRILLIC CAPITAL LETTER U - 0x0424: 0xc4, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0xc5, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0xc6, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0xc7, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0xc8, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0xc9, # CYRILLIC CAPITAL LETTER SHCHA - 0x042a: 0xca, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042b: 0xcb, # CYRILLIC CAPITAL LETTER YERU - 0x042c: 0xcc, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042d: 0xcd, # CYRILLIC CAPITAL LETTER E - 0x042e: 0xce, # CYRILLIC CAPITAL LETTER YU - 0x042f: 0xcf, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0xd0, # CYRILLIC SMALL LETTER A - 0x0431: 0xd1, # CYRILLIC SMALL LETTER BE - 0x0432: 0xd2, # CYRILLIC SMALL LETTER VE - 0x0433: 0xd3, # CYRILLIC SMALL LETTER GHE - 0x0434: 0xd4, # CYRILLIC SMALL LETTER DE - 0x0435: 0xd5, # CYRILLIC SMALL LETTER IE - 0x0436: 0xd6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0xd7, # CYRILLIC SMALL LETTER ZE - 0x0438: 0xd8, # CYRILLIC SMALL LETTER I - 0x0439: 0xd9, # CYRILLIC SMALL LETTER SHORT I - 0x043a: 0xda, # CYRILLIC SMALL LETTER KA - 0x043b: 0xdb, # CYRILLIC SMALL LETTER EL - 0x043c: 0xdc, # CYRILLIC SMALL LETTER EM - 0x043d: 0xdd, # CYRILLIC SMALL LETTER EN - 0x043e: 0xde, # CYRILLIC SMALL LETTER O - 0x043f: 0xdf, # CYRILLIC SMALL LETTER PE - 0x0440: 0xe0, # CYRILLIC SMALL LETTER ER - 0x0441: 0xe1, # CYRILLIC SMALL LETTER ES - 0x0442: 0xe2, # CYRILLIC SMALL LETTER TE - 0x0443: 0xe3, # CYRILLIC SMALL LETTER U - 0x0444: 0xe4, # CYRILLIC SMALL LETTER EF - 0x0445: 0xe5, # CYRILLIC SMALL LETTER HA - 0x0446: 0xe6, # CYRILLIC SMALL LETTER TSE - 0x0447: 0xe7, # CYRILLIC SMALL LETTER CHE - 0x0448: 0xe8, # CYRILLIC SMALL LETTER SHA - 0x0449: 0xe9, # CYRILLIC SMALL LETTER SHCHA - 0x044a: 0xea, # CYRILLIC SMALL LETTER HARD SIGN - 0x044b: 0xeb, # CYRILLIC SMALL LETTER YERU - 0x044c: 0xec, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044d: 0xed, # CYRILLIC SMALL LETTER E - 0x044e: 0xee, # CYRILLIC SMALL LETTER YU - 0x044f: 0xef, # CYRILLIC SMALL LETTER YA - 0x0451: 0xf1, # CYRILLIC SMALL LETTER IO - 0x0452: 0xf2, # CYRILLIC SMALL LETTER DJE - 0x0453: 0xf3, # CYRILLIC SMALL LETTER GJE - 0x0454: 0xf4, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0455: 0xf5, # CYRILLIC SMALL LETTER DZE - 0x0456: 0xf6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0457: 0xf7, # CYRILLIC SMALL LETTER YI - 0x0458: 0xf8, # CYRILLIC SMALL LETTER JE - 0x0459: 0xf9, # CYRILLIC SMALL LETTER LJE - 0x045a: 0xfa, # CYRILLIC SMALL LETTER NJE - 0x045b: 0xfb, # CYRILLIC SMALL LETTER TSHE - 0x045c: 0xfc, # CYRILLIC SMALL LETTER KJE - 0x045e: 0xfe, # CYRILLIC SMALL LETTER SHORT U - 0x045f: 0xff, # CYRILLIC SMALL LETTER DZHE - 0x2116: 0xf0, # NUMERO SIGN -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A7: 0xFD, # SECTION SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x0401: 0xA1, # CYRILLIC CAPITAL LETTER IO + 0x0402: 0xA2, # CYRILLIC CAPITAL LETTER DJE + 0x0403: 0xA3, # CYRILLIC CAPITAL LETTER GJE + 0x0404: 0xA4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0xA5, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0xA6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0xA7, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0xA8, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0xA9, # CYRILLIC CAPITAL LETTER LJE + 0x040A: 0xAA, # CYRILLIC CAPITAL LETTER NJE + 0x040B: 0xAB, # CYRILLIC CAPITAL LETTER TSHE + 0x040C: 0xAC, # CYRILLIC CAPITAL LETTER KJE + 0x040E: 0xAE, # CYRILLIC CAPITAL LETTER SHORT U + 0x040F: 0xAF, # CYRILLIC CAPITAL LETTER DZHE + 0x0410: 0xB0, # CYRILLIC CAPITAL LETTER A + 0x0411: 0xB1, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0xB2, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0xB3, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0xB4, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0xB5, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0xB6, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0xB7, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0xB8, # CYRILLIC CAPITAL LETTER I + 0x0419: 0xB9, # CYRILLIC CAPITAL LETTER SHORT I + 0x041A: 0xBA, # CYRILLIC CAPITAL LETTER KA + 0x041B: 0xBB, # CYRILLIC CAPITAL LETTER EL + 0x041C: 0xBC, # CYRILLIC CAPITAL LETTER EM + 0x041D: 0xBD, # CYRILLIC CAPITAL LETTER EN + 0x041E: 0xBE, # CYRILLIC CAPITAL LETTER O + 0x041F: 0xBF, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0xC0, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0xC1, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0xC2, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0xC3, # CYRILLIC CAPITAL LETTER U + 0x0424: 0xC4, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0xC5, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0xC6, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0xC7, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0xC8, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0xC9, # CYRILLIC CAPITAL LETTER SHCHA + 0x042A: 0xCA, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042B: 0xCB, # CYRILLIC CAPITAL LETTER YERU + 0x042C: 0xCC, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042D: 0xCD, # CYRILLIC CAPITAL LETTER E + 0x042E: 0xCE, # CYRILLIC CAPITAL LETTER YU + 0x042F: 0xCF, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0xD0, # CYRILLIC SMALL LETTER A + 0x0431: 0xD1, # CYRILLIC SMALL LETTER BE + 0x0432: 0xD2, # CYRILLIC SMALL LETTER VE + 0x0433: 0xD3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0xD4, # CYRILLIC SMALL LETTER DE + 0x0435: 0xD5, # CYRILLIC SMALL LETTER IE + 0x0436: 0xD6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0xD7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0xD8, # CYRILLIC SMALL LETTER I + 0x0439: 0xD9, # CYRILLIC SMALL LETTER SHORT I + 0x043A: 0xDA, # CYRILLIC SMALL LETTER KA + 0x043B: 0xDB, # CYRILLIC SMALL LETTER EL + 0x043C: 0xDC, # CYRILLIC SMALL LETTER EM + 0x043D: 0xDD, # CYRILLIC SMALL LETTER EN + 0x043E: 0xDE, # CYRILLIC SMALL LETTER O + 0x043F: 0xDF, # CYRILLIC SMALL LETTER PE + 0x0440: 0xE0, # CYRILLIC SMALL LETTER ER + 0x0441: 0xE1, # CYRILLIC SMALL LETTER ES + 0x0442: 0xE2, # CYRILLIC SMALL LETTER TE + 0x0443: 0xE3, # CYRILLIC SMALL LETTER U + 0x0444: 0xE4, # CYRILLIC SMALL LETTER EF + 0x0445: 0xE5, # CYRILLIC SMALL LETTER HA + 0x0446: 0xE6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0xE7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0xE8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0xE9, # CYRILLIC SMALL LETTER SHCHA + 0x044A: 0xEA, # CYRILLIC SMALL LETTER HARD SIGN + 0x044B: 0xEB, # CYRILLIC SMALL LETTER YERU + 0x044C: 0xEC, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044D: 0xED, # CYRILLIC SMALL LETTER E + 0x044E: 0xEE, # CYRILLIC SMALL LETTER YU + 0x044F: 0xEF, # CYRILLIC SMALL LETTER YA + 0x0451: 0xF1, # CYRILLIC SMALL LETTER IO + 0x0452: 0xF2, # CYRILLIC SMALL LETTER DJE + 0x0453: 0xF3, # CYRILLIC SMALL LETTER GJE + 0x0454: 0xF4, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0xF5, # CYRILLIC SMALL LETTER DZE + 0x0456: 0xF6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0xF7, # CYRILLIC SMALL LETTER YI + 0x0458: 0xF8, # CYRILLIC SMALL LETTER JE + 0x0459: 0xF9, # CYRILLIC SMALL LETTER LJE + 0x045A: 0xFA, # CYRILLIC SMALL LETTER NJE + 0x045B: 0xFB, # CYRILLIC SMALL LETTER TSHE + 0x045C: 0xFC, # CYRILLIC SMALL LETTER KJE + 0x045E: 0xFE, # CYRILLIC SMALL LETTER SHORT U + 0x045F: 0xFF, # CYRILLIC SMALL LETTER DZHE + 0x2116: 0xF0, # NUMERO SIGN +} + Index: iso8859_6.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_6.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- iso8859_6.py 24 Oct 2005 12:07:48 -0000 1.6 +++ iso8859_6.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,17 +186,17 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE u'\ufffe' u'\ufffe' u'\ufffe' - u'\xa4' # 0xa4 -> CURRENCY SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN u'\ufffe' u'\ufffe' u'\ufffe' @@ -204,8 +204,8 @@ u'\ufffe' u'\ufffe' u'\ufffe' - u'\u060c' # 0xac -> ARABIC COMMA - u'\xad' # 0xad -> SOFT HYPHEN + u'\u060c' # 0xAC -> ARABIC COMMA + u'\xad' # 0xAD -> SOFT HYPHEN u'\ufffe' u'\ufffe' u'\ufffe' @@ -219,62 +219,62 @@ u'\ufffe' u'\ufffe' u'\ufffe' - u'\u061b' # 0xbb -> ARABIC SEMICOLON + u'\u061b' # 0xBB -> ARABIC SEMICOLON u'\ufffe' u'\ufffe' u'\ufffe' - u'\u061f' # 0xbf -> ARABIC QUESTION MARK + u'\u061f' # 0xBF -> ARABIC QUESTION MARK u'\ufffe' - u'\u0621' # 0xc1 -> ARABIC LETTER HAMZA - u'\u0622' # 0xc2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0xc3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0xc4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0xc5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0xc6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0xc7 -> ARABIC LETTER ALEF - u'\u0628' # 0xc8 -> ARABIC LETTER BEH - u'\u0629' # 0xc9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0xca -> ARABIC LETTER TEH - u'\u062b' # 0xcb -> ARABIC LETTER THEH - u'\u062c' # 0xcc -> ARABIC LETTER JEEM - u'\u062d' # 0xcd -> ARABIC LETTER HAH - u'\u062e' # 0xce -> ARABIC LETTER KHAH - u'\u062f' # 0xcf -> ARABIC LETTER DAL - u'\u0630' # 0xd0 -> ARABIC LETTER THAL - u'\u0631' # 0xd1 -> ARABIC LETTER REH - u'\u0632' # 0xd2 -> ARABIC LETTER ZAIN - u'\u0633' # 0xd3 -> ARABIC LETTER SEEN - u'\u0634' # 0xd4 -> ARABIC LETTER SHEEN - u'\u0635' # 0xd5 -> ARABIC LETTER SAD - u'\u0636' # 0xd6 -> ARABIC LETTER DAD - u'\u0637' # 0xd7 -> ARABIC LETTER TAH - u'\u0638' # 0xd8 -> ARABIC LETTER ZAH - u'\u0639' # 0xd9 -> ARABIC LETTER AIN - u'\u063a' # 0xda -> ARABIC LETTER GHAIN + u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA + u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0xC7 -> ARABIC LETTER ALEF + u'\u0628' # 0xC8 -> ARABIC LETTER BEH + u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0xCA -> ARABIC LETTER TEH + u'\u062b' # 0xCB -> ARABIC LETTER THEH + u'\u062c' # 0xCC -> ARABIC LETTER JEEM + u'\u062d' # 0xCD -> ARABIC LETTER HAH + u'\u062e' # 0xCE -> ARABIC LETTER KHAH + u'\u062f' # 0xCF -> ARABIC LETTER DAL + u'\u0630' # 0xD0 -> ARABIC LETTER THAL + u'\u0631' # 0xD1 -> ARABIC LETTER REH + u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN + u'\u0633' # 0xD3 -> ARABIC LETTER SEEN + u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN + u'\u0635' # 0xD5 -> ARABIC LETTER SAD + u'\u0636' # 0xD6 -> ARABIC LETTER DAD + u'\u0637' # 0xD7 -> ARABIC LETTER TAH + u'\u0638' # 0xD8 -> ARABIC LETTER ZAH + u'\u0639' # 0xD9 -> ARABIC LETTER AIN + u'\u063a' # 0xDA -> ARABIC LETTER GHAIN u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' - u'\u0640' # 0xe0 -> ARABIC TATWEEL - u'\u0641' # 0xe1 -> ARABIC LETTER FEH - u'\u0642' # 0xe2 -> ARABIC LETTER QAF - u'\u0643' # 0xe3 -> ARABIC LETTER KAF - u'\u0644' # 0xe4 -> ARABIC LETTER LAM - u'\u0645' # 0xe5 -> ARABIC LETTER MEEM - u'\u0646' # 0xe6 -> ARABIC LETTER NOON - u'\u0647' # 0xe7 -> ARABIC LETTER HEH - u'\u0648' # 0xe8 -> ARABIC LETTER WAW - u'\u0649' # 0xe9 -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0xea -> ARABIC LETTER YEH - u'\u064b' # 0xeb -> ARABIC FATHATAN - u'\u064c' # 0xec -> ARABIC DAMMATAN - u'\u064d' # 0xed -> ARABIC KASRATAN - u'\u064e' # 0xee -> ARABIC FATHA - u'\u064f' # 0xef -> ARABIC DAMMA - u'\u0650' # 0xf0 -> ARABIC KASRA - u'\u0651' # 0xf1 -> ARABIC SHADDA - u'\u0652' # 0xf2 -> ARABIC SUKUN + u'\u0640' # 0xE0 -> ARABIC TATWEEL + u'\u0641' # 0xE1 -> ARABIC LETTER FEH + u'\u0642' # 0xE2 -> ARABIC LETTER QAF + u'\u0643' # 0xE3 -> ARABIC LETTER KAF + u'\u0644' # 0xE4 -> ARABIC LETTER LAM + u'\u0645' # 0xE5 -> ARABIC LETTER MEEM + u'\u0646' # 0xE6 -> ARABIC LETTER NOON + u'\u0647' # 0xE7 -> ARABIC LETTER HEH + u'\u0648' # 0xE8 -> ARABIC LETTER WAW + u'\u0649' # 0xE9 -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0xEA -> ARABIC LETTER YEH + u'\u064b' # 0xEB -> ARABIC FATHATAN + u'\u064c' # 0xEC -> ARABIC DAMMATAN + u'\u064d' # 0xED -> ARABIC KASRATAN + u'\u064e' # 0xEE -> ARABIC FATHA + u'\u064f' # 0xEF -> ARABIC DAMMA + u'\u0650' # 0xF0 -> ARABIC KASRA + u'\u0651' # 0xF1 -> ARABIC SHADDA + u'\u0652' # 0xF2 -> ARABIC SUKUN u'\ufffe' u'\ufffe' u'\ufffe' @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,61 +447,62 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x060c: 0xac, # ARABIC COMMA - 0x061b: 0xbb, # ARABIC SEMICOLON - 0x061f: 0xbf, # ARABIC QUESTION MARK - 0x0621: 0xc1, # ARABIC LETTER HAMZA - 0x0622: 0xc2, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x0623: 0xc3, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x0624: 0xc4, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x0625: 0xc5, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x0626: 0xc6, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x0627: 0xc7, # ARABIC LETTER ALEF - 0x0628: 0xc8, # ARABIC LETTER BEH - 0x0629: 0xc9, # ARABIC LETTER TEH MARBUTA - 0x062a: 0xca, # ARABIC LETTER TEH - 0x062b: 0xcb, # ARABIC LETTER THEH - 0x062c: 0xcc, # ARABIC LETTER JEEM - 0x062d: 0xcd, # ARABIC LETTER HAH - 0x062e: 0xce, # ARABIC LETTER KHAH - 0x062f: 0xcf, # ARABIC LETTER DAL - 0x0630: 0xd0, # ARABIC LETTER THAL - 0x0631: 0xd1, # ARABIC LETTER REH - 0x0632: 0xd2, # ARABIC LETTER ZAIN - 0x0633: 0xd3, # ARABIC LETTER SEEN - 0x0634: 0xd4, # ARABIC LETTER SHEEN - 0x0635: 0xd5, # ARABIC LETTER SAD - 0x0636: 0xd6, # ARABIC LETTER DAD - 0x0637: 0xd7, # ARABIC LETTER TAH - 0x0638: 0xd8, # ARABIC LETTER ZAH - 0x0639: 0xd9, # ARABIC LETTER AIN - 0x063a: 0xda, # ARABIC LETTER GHAIN - 0x0640: 0xe0, # ARABIC TATWEEL - 0x0641: 0xe1, # ARABIC LETTER FEH - 0x0642: 0xe2, # ARABIC LETTER QAF - 0x0643: 0xe3, # ARABIC LETTER KAF - 0x0644: 0xe4, # ARABIC LETTER LAM - 0x0645: 0xe5, # ARABIC LETTER MEEM - 0x0646: 0xe6, # ARABIC LETTER NOON - 0x0647: 0xe7, # ARABIC LETTER HEH - 0x0648: 0xe8, # ARABIC LETTER WAW - 0x0649: 0xe9, # ARABIC LETTER ALEF MAKSURA - 0x064a: 0xea, # ARABIC LETTER YEH - 0x064b: 0xeb, # ARABIC FATHATAN - 0x064c: 0xec, # ARABIC DAMMATAN - 0x064d: 0xed, # ARABIC KASRATAN - 0x064e: 0xee, # ARABIC FATHA - 0x064f: 0xef, # ARABIC DAMMA - 0x0650: 0xf0, # ARABIC KASRA - 0x0651: 0xf1, # ARABIC SHADDA - 0x0652: 0xf2, # ARABIC SUKUN -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x060C: 0xAC, # ARABIC COMMA + 0x061B: 0xBB, # ARABIC SEMICOLON + 0x061F: 0xBF, # ARABIC QUESTION MARK + 0x0621: 0xC1, # ARABIC LETTER HAMZA + 0x0622: 0xC2, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x0623: 0xC3, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x0624: 0xC4, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x0625: 0xC5, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x0626: 0xC6, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x0627: 0xC7, # ARABIC LETTER ALEF + 0x0628: 0xC8, # ARABIC LETTER BEH + 0x0629: 0xC9, # ARABIC LETTER TEH MARBUTA + 0x062A: 0xCA, # ARABIC LETTER TEH + 0x062B: 0xCB, # ARABIC LETTER THEH + 0x062C: 0xCC, # ARABIC LETTER JEEM + 0x062D: 0xCD, # ARABIC LETTER HAH + 0x062E: 0xCE, # ARABIC LETTER KHAH + 0x062F: 0xCF, # ARABIC LETTER DAL + 0x0630: 0xD0, # ARABIC LETTER THAL + 0x0631: 0xD1, # ARABIC LETTER REH + 0x0632: 0xD2, # ARABIC LETTER ZAIN + 0x0633: 0xD3, # ARABIC LETTER SEEN + 0x0634: 0xD4, # ARABIC LETTER SHEEN + 0x0635: 0xD5, # ARABIC LETTER SAD + 0x0636: 0xD6, # ARABIC LETTER DAD + 0x0637: 0xD7, # ARABIC LETTER TAH + 0x0638: 0xD8, # ARABIC LETTER ZAH + 0x0639: 0xD9, # ARABIC LETTER AIN + 0x063A: 0xDA, # ARABIC LETTER GHAIN + 0x0640: 0xE0, # ARABIC TATWEEL + 0x0641: 0xE1, # ARABIC LETTER FEH + 0x0642: 0xE2, # ARABIC LETTER QAF + 0x0643: 0xE3, # ARABIC LETTER KAF + 0x0644: 0xE4, # ARABIC LETTER LAM + 0x0645: 0xE5, # ARABIC LETTER MEEM + 0x0646: 0xE6, # ARABIC LETTER NOON + 0x0647: 0xE7, # ARABIC LETTER HEH + 0x0648: 0xE8, # ARABIC LETTER WAW + 0x0649: 0xE9, # ARABIC LETTER ALEF MAKSURA + 0x064A: 0xEA, # ARABIC LETTER YEH + 0x064B: 0xEB, # ARABIC FATHATAN + 0x064C: 0xEC, # ARABIC DAMMATAN + 0x064D: 0xED, # ARABIC KASRATAN + 0x064E: 0xEE, # ARABIC FATHA + 0x064F: 0xEF, # ARABIC DAMMA + 0x0650: 0xF0, # ARABIC KASRA + 0x0651: 0xF1, # ARABIC SHADDA + 0x0652: 0xF2, # ARABIC SUKUN +} + Index: iso8859_7.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_7.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- iso8859_7.py 24 Oct 2005 12:07:48 -0000 1.6 +++ iso8859_7.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,107 +186,107 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\u2018' # 0xa1 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xa2 -> RIGHT SINGLE QUOTATION MARK - u'\xa3' # 0xa3 -> POUND SIGN - u'\u20ac' # 0xa4 -> EURO SIGN - u'\u20af' # 0xa5 -> DRACHMA SIGN - u'\xa6' # 0xa6 -> BROKEN BAR - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xa8' # 0xa8 -> DIAERESIS - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\u037a' # 0xaa -> GREEK YPOGEGRAMMENI - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xac -> NOT SIGN - u'\xad' # 0xad -> SOFT HYPHEN + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u2018' # 0xA1 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xA2 -> RIGHT SINGLE QUOTATION MARK + u'\xa3' # 0xA3 -> POUND SIGN + u'\u20ac' # 0xA4 -> EURO SIGN + u'\u20af' # 0xA5 -> DRACHMA SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u037a' # 0xAA -> GREEK YPOGEGRAMMENI + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN u'\ufffe' - u'\u2015' # 0xaf -> HORIZONTAL BAR - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\xb2' # 0xb2 -> SUPERSCRIPT TWO - u'\xb3' # 0xb3 -> SUPERSCRIPT THREE - u'\u0384' # 0xb4 -> GREEK TONOS - u'\u0385' # 0xb5 -> GREEK DIALYTIKA TONOS - u'\u0386' # 0xb6 -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\u0388' # 0xb8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0389' # 0xb9 -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0xba -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u038c' # 0xbc -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF - u'\u038e' # 0xbe -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u038f' # 0xbf -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\u0390' # 0xc0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u0391' # 0xc1 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0xc2 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0xc3 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0xc4 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0xc5 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0xc6 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0xc7 -> GREEK CAPITAL LETTER ETA - u'\u0398' # 0xc8 -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0xc9 -> GREEK CAPITAL LETTER IOTA - u'\u039a' # 0xca -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0xcb -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0xcc -> GREEK CAPITAL LETTER MU - u'\u039d' # 0xcd -> GREEK CAPITAL LETTER NU - u'\u039e' # 0xce -> GREEK CAPITAL LETTER XI - u'\u039f' # 0xcf -> GREEK CAPITAL LETTER OMICRON - u'\u03a0' # 0xd0 -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0xd1 -> GREEK CAPITAL LETTER RHO + u'\u2015' # 0xAF -> HORIZONTAL BAR + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\u0384' # 0xB4 -> GREEK TONOS + u'\u0385' # 0xB5 -> GREEK DIALYTIKA TONOS + u'\u0386' # 0xB6 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u0388' # 0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u038c' # 0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\u038e' # 0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\u0390' # 0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u0391' # 0xC1 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0xC2 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0xC3 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0xC4 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0xC5 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0xC6 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0xC7 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0xC8 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0xC9 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0xCA -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0xCB -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0xCC -> GREEK CAPITAL LETTER MU + u'\u039d' # 0xCD -> GREEK CAPITAL LETTER NU + u'\u039e' # 0xCE -> GREEK CAPITAL LETTER XI + u'\u039f' # 0xCF -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0xD0 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0xD1 -> GREEK CAPITAL LETTER RHO u'\ufffe' - u'\u03a3' # 0xd3 -> GREEK CAPITAL LETTER SIGMA - u'\u03a4' # 0xd4 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0xd5 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0xd6 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0xd7 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0xd8 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0xd9 -> GREEK CAPITAL LETTER OMEGA - u'\u03aa' # 0xda -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u03ab' # 0xdb -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\u03ac' # 0xdc -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u03ad' # 0xdd -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xde -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03af' # 0xdf -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03b0' # 0xe0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\u03b1' # 0xe1 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0xe2 -> GREEK SMALL LETTER BETA - u'\u03b3' # 0xe3 -> GREEK SMALL LETTER GAMMA - u'\u03b4' # 0xe4 -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0xe5 -> GREEK SMALL LETTER EPSILON - u'\u03b6' # 0xe6 -> GREEK SMALL LETTER ZETA - u'\u03b7' # 0xe7 -> GREEK SMALL LETTER ETA - u'\u03b8' # 0xe8 -> GREEK SMALL LETTER THETA - u'\u03b9' # 0xe9 -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0xea -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0xeb -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0xec -> GREEK SMALL LETTER MU - u'\u03bd' # 0xed -> GREEK SMALL LETTER NU - u'\u03be' # 0xee -> GREEK SMALL LETTER XI - u'\u03bf' # 0xef -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xf0 -> GREEK SMALL LETTER PI - u'\u03c1' # 0xf1 -> GREEK SMALL LETTER RHO - u'\u03c2' # 0xf2 -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c3' # 0xf3 -> GREEK SMALL LETTER SIGMA - u'\u03c4' # 0xf4 -> GREEK SMALL LETTER TAU - u'\u03c5' # 0xf5 -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0xf6 -> GREEK SMALL LETTER PHI - u'\u03c7' # 0xf7 -> GREEK SMALL LETTER CHI - u'\u03c8' # 0xf8 -> GREEK SMALL LETTER PSI - u'\u03c9' # 0xf9 -> GREEK SMALL LETTER OMEGA - u'\u03ca' # 0xfa -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03cb' # 0xfb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03cc' # 0xfc -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0xfd -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03ce' # 0xfe -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u03a3' # 0xD3 -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0xD4 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0xD5 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0xD6 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0xD7 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0xD8 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0xD9 -> GREEK CAPITAL LETTER OMEGA + u'\u03aa' # 0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\u03ac' # 0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0xDE -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0xDF -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03b0' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0xE3 -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0xE6 -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0xE7 -> GREEK SMALL LETTER ETA + u'\u03b8' # 0xE8 -> GREEK SMALL LETTER THETA + u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0xEA -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0xEB -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0xEC -> GREEK SMALL LETTER MU + u'\u03bd' # 0xED -> GREEK SMALL LETTER NU + u'\u03be' # 0xEE -> GREEK SMALL LETTER XI + u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI + u'\u03c1' # 0xF1 -> GREEK SMALL LETTER RHO + u'\u03c2' # 0xF2 -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA + u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU + u'\u03c5' # 0xF5 -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0xF6 -> GREEK SMALL LETTER PHI + u'\u03c7' # 0xF7 -> GREEK SMALL LETTER CHI + u'\u03c8' # 0xF8 -> GREEK SMALL LETTER PSI + u'\u03c9' # 0xF9 -> GREEK SMALL LETTER OMEGA + u'\u03ca' # 0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03cb' # 0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03cc' # 0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03ce' # 0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS u'\ufffe' ) @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,103 +447,104 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a3: 0xa3, # POUND SIGN - 0x00a6: 0xa6, # BROKEN BAR - 0x00a7: 0xa7, # SECTION SIGN - 0x00a8: 0xa8, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xac, # NOT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b2: 0xb2, # SUPERSCRIPT TWO - 0x00b3: 0xb3, # SUPERSCRIPT THREE - 0x00b7: 0xb7, # MIDDLE DOT - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bd: 0xbd, # VULGAR FRACTION ONE HALF - 0x037a: 0xaa, # GREEK YPOGEGRAMMENI - 0x0384: 0xb4, # GREEK TONOS - 0x0385: 0xb5, # GREEK DIALYTIKA TONOS - 0x0386: 0xb6, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0388: 0xb8, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0xb9, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038a: 0xba, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038c: 0xbc, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038e: 0xbe, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038f: 0xbf, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0390: 0xc0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x0391: 0xc1, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0xc2, # GREEK CAPITAL LETTER BETA - 0x0393: 0xc3, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0xc4, # GREEK CAPITAL LETTER DELTA - 0x0395: 0xc5, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0xc6, # GREEK CAPITAL LETTER ZETA - 0x0397: 0xc7, # GREEK CAPITAL LETTER ETA - 0x0398: 0xc8, # GREEK CAPITAL LETTER THETA - 0x0399: 0xc9, # GREEK CAPITAL LETTER IOTA - 0x039a: 0xca, # GREEK CAPITAL LETTER KAPPA - 0x039b: 0xcb, # GREEK CAPITAL LETTER LAMDA - 0x039c: 0xcc, # GREEK CAPITAL LETTER MU - 0x039d: 0xcd, # GREEK CAPITAL LETTER NU - 0x039e: 0xce, # GREEK CAPITAL LETTER XI - 0x039f: 0xcf, # GREEK CAPITAL LETTER OMICRON - 0x03a0: 0xd0, # GREEK CAPITAL LETTER PI - 0x03a1: 0xd1, # GREEK CAPITAL LETTER RHO - 0x03a3: 0xd3, # GREEK CAPITAL LETTER SIGMA - 0x03a4: 0xd4, # GREEK CAPITAL LETTER TAU - 0x03a5: 0xd5, # GREEK CAPITAL LETTER UPSILON - 0x03a6: 0xd6, # GREEK CAPITAL LETTER PHI - 0x03a7: 0xd7, # GREEK CAPITAL LETTER CHI - 0x03a8: 0xd8, # GREEK CAPITAL LETTER PSI - 0x03a9: 0xd9, # GREEK CAPITAL LETTER OMEGA - 0x03aa: 0xda, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03ab: 0xdb, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03ac: 0xdc, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03ad: 0xdd, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03ae: 0xde, # GREEK SMALL LETTER ETA WITH TONOS - 0x03af: 0xdf, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03b0: 0xe0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03b1: 0xe1, # GREEK SMALL LETTER ALPHA - 0x03b2: 0xe2, # GREEK SMALL LETTER BETA - 0x03b3: 0xe3, # GREEK SMALL LETTER GAMMA - 0x03b4: 0xe4, # GREEK SMALL LETTER DELTA - 0x03b5: 0xe5, # GREEK SMALL LETTER EPSILON - 0x03b6: 0xe6, # GREEK SMALL LETTER ZETA - 0x03b7: 0xe7, # GREEK SMALL LETTER ETA - 0x03b8: 0xe8, # GREEK SMALL LETTER THETA - 0x03b9: 0xe9, # GREEK SMALL LETTER IOTA - 0x03ba: 0xea, # GREEK SMALL LETTER KAPPA - 0x03bb: 0xeb, # GREEK SMALL LETTER LAMDA - 0x03bc: 0xec, # GREEK SMALL LETTER MU - 0x03bd: 0xed, # GREEK SMALL LETTER NU - 0x03be: 0xee, # GREEK SMALL LETTER XI - 0x03bf: 0xef, # GREEK SMALL LETTER OMICRON - 0x03c0: 0xf0, # GREEK SMALL LETTER PI - 0x03c1: 0xf1, # GREEK SMALL LETTER RHO - 0x03c2: 0xf2, # GREEK SMALL LETTER FINAL SIGMA - 0x03c3: 0xf3, # GREEK SMALL LETTER SIGMA - 0x03c4: 0xf4, # GREEK SMALL LETTER TAU - 0x03c5: 0xf5, # GREEK SMALL LETTER UPSILON - 0x03c6: 0xf6, # GREEK SMALL LETTER PHI - 0x03c7: 0xf7, # GREEK SMALL LETTER CHI - 0x03c8: 0xf8, # GREEK SMALL LETTER PSI - 0x03c9: 0xf9, # GREEK SMALL LETTER OMEGA - 0x03ca: 0xfa, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03cb: 0xfb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03cc: 0xfc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03cd: 0xfd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03ce: 0xfe, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x2015: 0xaf, # HORIZONTAL BAR - 0x2018: 0xa1, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xa2, # RIGHT SINGLE QUOTATION MARK - 0x20ac: 0xa4, # EURO SIGN - 0x20af: 0xa5, # DRACHMA SIGN -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A3: 0xA3, # POUND SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B7: 0xB7, # MIDDLE DOT + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x037A: 0xAA, # GREEK YPOGEGRAMMENI + 0x0384: 0xB4, # GREEK TONOS + 0x0385: 0xB5, # GREEK DIALYTIKA TONOS + 0x0386: 0xB6, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0xB8, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0xB9, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038A: 0xBA, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038C: 0xBC, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038E: 0xBE, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038F: 0xBF, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0xC0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0xC1, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0xC2, # GREEK CAPITAL LETTER BETA + 0x0393: 0xC3, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0xC4, # GREEK CAPITAL LETTER DELTA + 0x0395: 0xC5, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0xC6, # GREEK CAPITAL LETTER ZETA + 0x0397: 0xC7, # GREEK CAPITAL LETTER ETA + 0x0398: 0xC8, # GREEK CAPITAL LETTER THETA + 0x0399: 0xC9, # GREEK CAPITAL LETTER IOTA + 0x039A: 0xCA, # GREEK CAPITAL LETTER KAPPA + 0x039B: 0xCB, # GREEK CAPITAL LETTER LAMDA + 0x039C: 0xCC, # GREEK CAPITAL LETTER MU + 0x039D: 0xCD, # GREEK CAPITAL LETTER NU + 0x039E: 0xCE, # GREEK CAPITAL LETTER XI + 0x039F: 0xCF, # GREEK CAPITAL LETTER OMICRON + 0x03A0: 0xD0, # GREEK CAPITAL LETTER PI + 0x03A1: 0xD1, # GREEK CAPITAL LETTER RHO + 0x03A3: 0xD3, # GREEK CAPITAL LETTER SIGMA + 0x03A4: 0xD4, # GREEK CAPITAL LETTER TAU + 0x03A5: 0xD5, # GREEK CAPITAL LETTER UPSILON + 0x03A6: 0xD6, # GREEK CAPITAL LETTER PHI + 0x03A7: 0xD7, # GREEK CAPITAL LETTER CHI + 0x03A8: 0xD8, # GREEK CAPITAL LETTER PSI + 0x03A9: 0xD9, # GREEK CAPITAL LETTER OMEGA + 0x03AA: 0xDA, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03AB: 0xDB, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03AC: 0xDC, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03AD: 0xDD, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03AE: 0xDE, # GREEK SMALL LETTER ETA WITH TONOS + 0x03AF: 0xDF, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03B0: 0xE0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03B1: 0xE1, # GREEK SMALL LETTER ALPHA + 0x03B2: 0xE2, # GREEK SMALL LETTER BETA + 0x03B3: 0xE3, # GREEK SMALL LETTER GAMMA + 0x03B4: 0xE4, # GREEK SMALL LETTER DELTA + 0x03B5: 0xE5, # GREEK SMALL LETTER EPSILON + 0x03B6: 0xE6, # GREEK SMALL LETTER ZETA + 0x03B7: 0xE7, # GREEK SMALL LETTER ETA + 0x03B8: 0xE8, # GREEK SMALL LETTER THETA + 0x03B9: 0xE9, # GREEK SMALL LETTER IOTA + 0x03BA: 0xEA, # GREEK SMALL LETTER KAPPA + 0x03BB: 0xEB, # GREEK SMALL LETTER LAMDA + 0x03BC: 0xEC, # GREEK SMALL LETTER MU + 0x03BD: 0xED, # GREEK SMALL LETTER NU + 0x03BE: 0xEE, # GREEK SMALL LETTER XI + 0x03BF: 0xEF, # GREEK SMALL LETTER OMICRON + 0x03C0: 0xF0, # GREEK SMALL LETTER PI + 0x03C1: 0xF1, # GREEK SMALL LETTER RHO + 0x03C2: 0xF2, # GREEK SMALL LETTER FINAL SIGMA + 0x03C3: 0xF3, # GREEK SMALL LETTER SIGMA + 0x03C4: 0xF4, # GREEK SMALL LETTER TAU + 0x03C5: 0xF5, # GREEK SMALL LETTER UPSILON + 0x03C6: 0xF6, # GREEK SMALL LETTER PHI + 0x03C7: 0xF7, # GREEK SMALL LETTER CHI + 0x03C8: 0xF8, # GREEK SMALL LETTER PSI + 0x03C9: 0xF9, # GREEK SMALL LETTER OMEGA + 0x03CA: 0xFA, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03CB: 0xFB, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03CC: 0xFC, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03CD: 0xFD, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03CE: 0xFE, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2015: 0xAF, # HORIZONTAL BAR + 0x2018: 0xA1, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xA2, # RIGHT SINGLE QUOTATION MARK + 0x20AC: 0xA4, # EURO SIGN + 0x20AF: 0xA5, # DRACHMA SIGN +} + Index: iso8859_8.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_8.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- iso8859_8.py 24 Oct 2005 12:07:48 -0000 1.6 +++ iso8859_8.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,43 +186,43 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE u'\ufffe' - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa4' # 0xa4 -> CURRENCY SIGN - u'\xa5' # 0xa5 -> YEN SIGN - u'\xa6' # 0xa6 -> BROKEN BAR - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xa8' # 0xa8 -> DIAERESIS - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\xd7' # 0xaa -> MULTIPLICATION SIGN - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xac -> NOT SIGN - u'\xad' # 0xad -> SOFT HYPHEN - u'\xae' # 0xae -> REGISTERED SIGN - u'\xaf' # 0xaf -> MACRON - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\xb2' # 0xb2 -> SUPERSCRIPT TWO - u'\xb3' # 0xb3 -> SUPERSCRIPT THREE - u'\xb4' # 0xb4 -> ACUTE ACCENT - u'\xb5' # 0xb5 -> MICRO SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\xb8' # 0xb8 -> CEDILLA - u'\xb9' # 0xb9 -> SUPERSCRIPT ONE - u'\xf7' # 0xba -> DIVISION SIGN - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xbc -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xbe -> VULGAR FRACTION THREE QUARTERS + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xd7' # 0xAA -> MULTIPLICATION SIGN + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xf7' # 0xBA -> DIVISION SIGN + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS u'\ufffe' u'\ufffe' u'\ufffe' @@ -255,38 +255,38 @@ u'\ufffe' u'\ufffe' u'\ufffe' - u'\u2017' # 0xdf -> DOUBLE LOW LINE - u'\u05d0' # 0xe0 -> HEBREW LETTER ALEF - u'\u05d1' # 0xe1 -> HEBREW LETTER BET - u'\u05d2' # 0xe2 -> HEBREW LETTER GIMEL - u'\u05d3' # 0xe3 -> HEBREW LETTER DALET - u'\u05d4' # 0xe4 -> HEBREW LETTER HE - u'\u05d5' # 0xe5 -> HEBREW LETTER VAV - u'\u05d6' # 0xe6 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0xe7 -> HEBREW LETTER HET - u'\u05d8' # 0xe8 -> HEBREW LETTER TET - u'\u05d9' # 0xe9 -> HEBREW LETTER YOD - u'\u05da' # 0xea -> HEBREW LETTER FINAL KAF - u'\u05db' # 0xeb -> HEBREW LETTER KAF - u'\u05dc' # 0xec -> HEBREW LETTER LAMED - u'\u05dd' # 0xed -> HEBREW LETTER FINAL MEM - u'\u05de' # 0xee -> HEBREW LETTER MEM - u'\u05df' # 0xef -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0xf0 -> HEBREW LETTER NUN - u'\u05e1' # 0xf1 -> HEBREW LETTER SAMEKH - u'\u05e2' # 0xf2 -> HEBREW LETTER AYIN - u'\u05e3' # 0xf3 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0xf4 -> HEBREW LETTER PE - u'\u05e5' # 0xf5 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0xf6 -> HEBREW LETTER TSADI - u'\u05e7' # 0xf7 -> HEBREW LETTER QOF - u'\u05e8' # 0xf8 -> HEBREW LETTER RESH - u'\u05e9' # 0xf9 -> HEBREW LETTER SHIN - u'\u05ea' # 0xfa -> HEBREW LETTER TAV + u'\u2017' # 0xDF -> DOUBLE LOW LINE + u'\u05d0' # 0xE0 -> HEBREW LETTER ALEF + u'\u05d1' # 0xE1 -> HEBREW LETTER BET + u'\u05d2' # 0xE2 -> HEBREW LETTER GIMEL + u'\u05d3' # 0xE3 -> HEBREW LETTER DALET + u'\u05d4' # 0xE4 -> HEBREW LETTER HE + u'\u05d5' # 0xE5 -> HEBREW LETTER VAV + u'\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0xE7 -> HEBREW LETTER HET + u'\u05d8' # 0xE8 -> HEBREW LETTER TET + u'\u05d9' # 0xE9 -> HEBREW LETTER YOD + u'\u05da' # 0xEA -> HEBREW LETTER FINAL KAF + u'\u05db' # 0xEB -> HEBREW LETTER KAF + u'\u05dc' # 0xEC -> HEBREW LETTER LAMED + u'\u05dd' # 0xED -> HEBREW LETTER FINAL MEM + u'\u05de' # 0xEE -> HEBREW LETTER MEM + u'\u05df' # 0xEF -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0xF0 -> HEBREW LETTER NUN + u'\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0xF2 -> HEBREW LETTER AYIN + u'\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0xF4 -> HEBREW LETTER PE + u'\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0xF6 -> HEBREW LETTER TSADI + u'\u05e7' # 0xF7 -> HEBREW LETTER QOF + u'\u05e8' # 0xF8 -> HEBREW LETTER RESH + u'\u05e9' # 0xF9 -> HEBREW LETTER SHIN + u'\u05ea' # 0xFA -> HEBREW LETTER TAV u'\ufffe' u'\ufffe' - u'\u200e' # 0xfd -> LEFT-TO-RIGHT MARK - u'\u200f' # 0xfe -> RIGHT-TO-LEFT MARK + u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK + u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK u'\ufffe' ) @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,70 +447,71 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00a5: 0xa5, # YEN SIGN - 0x00a6: 0xa6, # BROKEN BAR - 0x00a7: 0xa7, # SECTION SIGN - 0x00a8: 0xa8, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xac, # NOT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00ae: 0xae, # REGISTERED SIGN - 0x00af: 0xaf, # MACRON - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b2: 0xb2, # SUPERSCRIPT TWO - 0x00b3: 0xb3, # SUPERSCRIPT THREE - 0x00b4: 0xb4, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00b8: 0xb8, # CEDILLA - 0x00b9: 0xb9, # SUPERSCRIPT ONE - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xbc, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xbd, # VULGAR FRACTION ONE HALF - 0x00be: 0xbe, # VULGAR FRACTION THREE QUARTERS - 0x00d7: 0xaa, # MULTIPLICATION SIGN - 0x00f7: 0xba, # DIVISION SIGN - 0x05d0: 0xe0, # HEBREW LETTER ALEF - 0x05d1: 0xe1, # HEBREW LETTER BET - 0x05d2: 0xe2, # HEBREW LETTER GIMEL - 0x05d3: 0xe3, # HEBREW LETTER DALET - 0x05d4: 0xe4, # HEBREW LETTER HE - 0x05d5: 0xe5, # HEBREW LETTER VAV - 0x05d6: 0xe6, # HEBREW LETTER ZAYIN - 0x05d7: 0xe7, # HEBREW LETTER HET - 0x05d8: 0xe8, # HEBREW LETTER TET - 0x05d9: 0xe9, # HEBREW LETTER YOD - 0x05da: 0xea, # HEBREW LETTER FINAL KAF - 0x05db: 0xeb, # HEBREW LETTER KAF - 0x05dc: 0xec, # HEBREW LETTER LAMED - 0x05dd: 0xed, # HEBREW LETTER FINAL MEM - 0x05de: 0xee, # HEBREW LETTER MEM - 0x05df: 0xef, # HEBREW LETTER FINAL NUN - 0x05e0: 0xf0, # HEBREW LETTER NUN - 0x05e1: 0xf1, # HEBREW LETTER SAMEKH - 0x05e2: 0xf2, # HEBREW LETTER AYIN - 0x05e3: 0xf3, # HEBREW LETTER FINAL PE - 0x05e4: 0xf4, # HEBREW LETTER PE - 0x05e5: 0xf5, # HEBREW LETTER FINAL TSADI - 0x05e6: 0xf6, # HEBREW LETTER TSADI - 0x05e7: 0xf7, # HEBREW LETTER QOF - 0x05e8: 0xf8, # HEBREW LETTER RESH - 0x05e9: 0xf9, # HEBREW LETTER SHIN - 0x05ea: 0xfa, # HEBREW LETTER TAV - 0x200e: 0xfd, # LEFT-TO-RIGHT MARK - 0x200f: 0xfe, # RIGHT-TO-LEFT MARK - 0x2017: 0xdf, # DOUBLE LOW LINE -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00D7: 0xAA, # MULTIPLICATION SIGN + 0x00F7: 0xBA, # DIVISION SIGN + 0x05D0: 0xE0, # HEBREW LETTER ALEF + 0x05D1: 0xE1, # HEBREW LETTER BET + 0x05D2: 0xE2, # HEBREW LETTER GIMEL + 0x05D3: 0xE3, # HEBREW LETTER DALET + 0x05D4: 0xE4, # HEBREW LETTER HE + 0x05D5: 0xE5, # HEBREW LETTER VAV + 0x05D6: 0xE6, # HEBREW LETTER ZAYIN + 0x05D7: 0xE7, # HEBREW LETTER HET + 0x05D8: 0xE8, # HEBREW LETTER TET + 0x05D9: 0xE9, # HEBREW LETTER YOD + 0x05DA: 0xEA, # HEBREW LETTER FINAL KAF + 0x05DB: 0xEB, # HEBREW LETTER KAF + 0x05DC: 0xEC, # HEBREW LETTER LAMED + 0x05DD: 0xED, # HEBREW LETTER FINAL MEM + 0x05DE: 0xEE, # HEBREW LETTER MEM + 0x05DF: 0xEF, # HEBREW LETTER FINAL NUN + 0x05E0: 0xF0, # HEBREW LETTER NUN + 0x05E1: 0xF1, # HEBREW LETTER SAMEKH + 0x05E2: 0xF2, # HEBREW LETTER AYIN + 0x05E3: 0xF3, # HEBREW LETTER FINAL PE + 0x05E4: 0xF4, # HEBREW LETTER PE + 0x05E5: 0xF5, # HEBREW LETTER FINAL TSADI + 0x05E6: 0xF6, # HEBREW LETTER TSADI + 0x05E7: 0xF7, # HEBREW LETTER QOF + 0x05E8: 0xF8, # HEBREW LETTER RESH + 0x05E9: 0xF9, # HEBREW LETTER SHIN + 0x05EA: 0xFA, # HEBREW LETTER TAV + 0x200E: 0xFD, # LEFT-TO-RIGHT MARK + 0x200F: 0xFE, # RIGHT-TO-LEFT MARK + 0x2017: 0xDF, # DOUBLE LOW LINE +} + Index: iso8859_9.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/iso8859_9.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- iso8859_9.py 24 Oct 2005 12:07:48 -0000 1.6 +++ iso8859_9.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,108 +186,108 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> - u'\xa0' # 0xa0 -> NO-BREAK SPACE - u'\xa1' # 0xa1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa4' # 0xa4 -> CURRENCY SIGN - u'\xa5' # 0xa5 -> YEN SIGN - u'\xa6' # 0xa6 -> BROKEN BAR - u'\xa7' # 0xa7 -> SECTION SIGN - u'\xa8' # 0xa8 -> DIAERESIS - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\xaa' # 0xaa -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xac -> NOT SIGN - u'\xad' # 0xad -> SOFT HYPHEN - u'\xae' # 0xae -> REGISTERED SIGN - u'\xaf' # 0xaf -> MACRON - u'\xb0' # 0xb0 -> DEGREE SIGN - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\xb2' # 0xb2 -> SUPERSCRIPT TWO - u'\xb3' # 0xb3 -> SUPERSCRIPT THREE - u'\xb4' # 0xb4 -> ACUTE ACCENT - u'\xb5' # 0xb5 -> MICRO SIGN - u'\xb6' # 0xb6 -> PILCROW SIGN - u'\xb7' # 0xb7 -> MIDDLE DOT - u'\xb8' # 0xb8 -> CEDILLA - u'\xb9' # 0xb9 -> SUPERSCRIPT ONE - u'\xba' # 0xba -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xbc -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xbe -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xbf -> INVERTED QUESTION MARK - u'\xc0' # 0xc0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xc1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xc2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xc3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xc4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xc5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xc6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xc7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xc8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xc9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xca -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xcb -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xcc -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xcd -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xce -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xcf -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u011e' # 0xd0 -> LATIN CAPITAL LETTER G WITH BREVE - u'\xd1' # 0xd1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xd2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xd3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xd4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xd5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xd6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xd7 -> MULTIPLICATION SIGN - u'\xd8' # 0xd8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xd9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xda -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xdb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xdc -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0130' # 0xdd -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u015e' # 0xde -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\xdf' # 0xdf -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xe0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xe1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xe2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xe3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xe4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xe5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xe6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xe7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xe8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xe9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xea -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xeb -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xec -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xed -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xee -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xef -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u011f' # 0xf0 -> LATIN SMALL LETTER G WITH BREVE - u'\xf1' # 0xf1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xf2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xf3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xf4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xf5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xf6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xf7 -> DIVISION SIGN - u'\xf8' # 0xf8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xf9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xfa -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xfb -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xfc -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0131' # 0xfd -> LATIN SMALL LETTER DOTLESS I - u'\u015f' # 0xfe -> LATIN SMALL LETTER S WITH CEDILLA - u'\xff' # 0xff -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I + u'\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,106 +447,107 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x00a0: 0xa0, # NO-BREAK SPACE - 0x00a1: 0xa1, # INVERTED EXCLAMATION MARK - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a4: 0xa4, # CURRENCY SIGN - 0x00a5: 0xa5, # YEN SIGN - 0x00a6: 0xa6, # BROKEN BAR - 0x00a7: 0xa7, # SECTION SIGN - 0x00a8: 0xa8, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00aa: 0xaa, # FEMININE ORDINAL INDICATOR - 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xac, # NOT SIGN - 0x00ad: 0xad, # SOFT HYPHEN - 0x00ae: 0xae, # REGISTERED SIGN - 0x00af: 0xaf, # MACRON - 0x00b0: 0xb0, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b2: 0xb2, # SUPERSCRIPT TWO - 0x00b3: 0xb3, # SUPERSCRIPT THREE - 0x00b4: 0xb4, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xb6, # PILCROW SIGN - 0x00b7: 0xb7, # MIDDLE DOT - 0x00b8: 0xb8, # CEDILLA - 0x00b9: 0xb9, # SUPERSCRIPT ONE - 0x00ba: 0xba, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0xbc, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0xbd, # VULGAR FRACTION ONE HALF - 0x00be: 0xbe, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0xbf, # INVERTED QUESTION MARK - 0x00c0: 0xc0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0xc1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xc2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0xc3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0xc4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0xc5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0xc6, # LATIN CAPITAL LETTER AE - 0x00c7: 0xc7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0xc8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0xc9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0xca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0xcb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0xcc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0xcd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0xcf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d1: 0xd1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xd2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xd3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xd4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xd5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0xd6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0xd7, # MULTIPLICATION SIGN - 0x00d8: 0xd8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0xd9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xda, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xdb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0xdc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0xdf, # LATIN SMALL LETTER SHARP S - 0x00e0: 0xe0, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0xe1, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0xe2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0xe3, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0xe4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0xe5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0xe6, # LATIN SMALL LETTER AE - 0x00e7: 0xe7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0xe8, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0xe9, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0xea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0xeb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0xec, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0xed, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0xee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0xef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0xf1, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0xf2, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0xf3, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0xf4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0xf5, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0xf6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xf7, # DIVISION SIGN - 0x00f8: 0xf8, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0xf9, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0xfa, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0xfb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0xfc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0xff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x011e: 0xd0, # LATIN CAPITAL LETTER G WITH BREVE - 0x011f: 0xf0, # LATIN SMALL LETTER G WITH BREVE - 0x0130: 0xdd, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0xfd, # LATIN SMALL LETTER DOTLESS I - 0x015e: 0xde, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015f: 0xfe, # LATIN SMALL LETTER S WITH CEDILLA -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x00A0: 0xA0, # NO-BREAK SPACE + 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A4: 0xA4, # CURRENCY SIGN + 0x00A5: 0xA5, # YEN SIGN + 0x00A6: 0xA6, # BROKEN BAR + 0x00A7: 0xA7, # SECTION SIGN + 0x00A8: 0xA8, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xAC, # NOT SIGN + 0x00AD: 0xAD, # SOFT HYPHEN + 0x00AE: 0xAE, # REGISTERED SIGN + 0x00AF: 0xAF, # MACRON + 0x00B0: 0xB0, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0xB2, # SUPERSCRIPT TWO + 0x00B3: 0xB3, # SUPERSCRIPT THREE + 0x00B4: 0xB4, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xB6, # PILCROW SIGN + 0x00B7: 0xB7, # MIDDLE DOT + 0x00B8: 0xB8, # CEDILLA + 0x00B9: 0xB9, # SUPERSCRIPT ONE + 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER + 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF + 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS + 0x00BF: 0xBF, # INVERTED QUESTION MARK + 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE + 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D7: 0xD7, # MULTIPLICATION SIGN + 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S + 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xE6, # LATIN SMALL LETTER AE + 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xF7, # DIVISION SIGN + 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011E: 0xD0, # LATIN CAPITAL LETTER G WITH BREVE + 0x011F: 0xF0, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0xDD, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0xFD, # LATIN SMALL LETTER DOTLESS I + 0x015E: 0xDE, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015F: 0xFE, # LATIN SMALL LETTER S WITH CEDILLA +} + Index: koi8_r.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/koi8_r.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- koi8_r.py 24 Oct 2005 12:07:48 -0000 1.6 +++ koi8_r.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\u2500' # 0x80 -> BOX DRAWINGS LIGHT HORIZONTAL u'\u2502' # 0x81 -> BOX DRAWINGS LIGHT VERTICAL u'\u250c' # 0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT @@ -170,12 +170,12 @@ u'\u2524' # 0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT u'\u252c' # 0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL u'\u2534' # 0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u253c' # 0x8a -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u2580' # 0x8b -> UPPER HALF BLOCK - u'\u2584' # 0x8c -> LOWER HALF BLOCK - u'\u2588' # 0x8d -> FULL BLOCK - u'\u258c' # 0x8e -> LEFT HALF BLOCK - u'\u2590' # 0x8f -> RIGHT HALF BLOCK + u'\u253c' # 0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u2580' # 0x8B -> UPPER HALF BLOCK + u'\u2584' # 0x8C -> LOWER HALF BLOCK + u'\u2588' # 0x8D -> FULL BLOCK + u'\u258c' # 0x8E -> LEFT HALF BLOCK + u'\u2590' # 0x8F -> RIGHT HALF BLOCK u'\u2591' # 0x90 -> LIGHT SHADE u'\u2592' # 0x91 -> MEDIUM SHADE u'\u2593' # 0x92 -> DARK SHADE @@ -186,108 +186,108 @@ u'\u2248' # 0x97 -> ALMOST EQUAL TO u'\u2264' # 0x98 -> LESS-THAN OR EQUAL TO u'\u2265' # 0x99 -> GREATER-THAN OR EQUAL TO - u'\xa0' # 0x9a -> NO-BREAK SPACE - u'\u2321' # 0x9b -> BOTTOM HALF INTEGRAL - u'\xb0' # 0x9c -> DEGREE SIGN - u'\xb2' # 0x9d -> SUPERSCRIPT TWO - u'\xb7' # 0x9e -> MIDDLE DOT - u'\xf7' # 0x9f -> DIVISION SIGN - u'\u2550' # 0xa0 -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u2551' # 0xa1 -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2552' # 0xa2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u0451' # 0xa3 -> CYRILLIC SMALL LETTER IO - u'\u2553' # 0xa4 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u2554' # 0xa5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2555' # 0xa6 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2556' # 0xa7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2557' # 0xa8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u2558' # 0xa9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2559' # 0xaa -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u255a' # 0xab -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u255b' # 0xac -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u255c' # 0xad -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255d' # 0xae -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255e' # 0xaf -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0xb0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u2560' # 0xb1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2561' # 0xb2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u0401' # 0xb3 -> CYRILLIC CAPITAL LETTER IO - u'\u2562' # 0xb4 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2563' # 0xb5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2564' # 0xb6 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0xb7 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2566' # 0xb8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2567' # 0xb9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0xba -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2569' # 0xbb -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u256a' # 0xbc -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u256b' # 0xbd -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256c' # 0xbe -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa9' # 0xbf -> COPYRIGHT SIGN - u'\u044e' # 0xc0 -> CYRILLIC SMALL LETTER YU - u'\u0430' # 0xc1 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xc2 -> CYRILLIC SMALL LETTER BE - u'\u0446' # 0xc3 -> CYRILLIC SMALL LETTER TSE - u'\u0434' # 0xc4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xc5 -> CYRILLIC SMALL LETTER IE - u'\u0444' # 0xc6 -> CYRILLIC SMALL LETTER EF - u'\u0433' # 0xc7 -> CYRILLIC SMALL LETTER GHE - u'\u0445' # 0xc8 -> CYRILLIC SMALL LETTER HA - u'\u0438' # 0xc9 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xca -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xcb -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xcc -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xcd -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xce -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xcf -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xd0 -> CYRILLIC SMALL LETTER PE - u'\u044f' # 0xd1 -> CYRILLIC SMALL LETTER YA - u'\u0440' # 0xd2 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xd3 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xd4 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xd5 -> CYRILLIC SMALL LETTER U - u'\u0436' # 0xd6 -> CYRILLIC SMALL LETTER ZHE - u'\u0432' # 0xd7 -> CYRILLIC SMALL LETTER VE - u'\u044c' # 0xd8 -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044b' # 0xd9 -> CYRILLIC SMALL LETTER YERU - u'\u0437' # 0xda -> CYRILLIC SMALL LETTER ZE - u'\u0448' # 0xdb -> CYRILLIC SMALL LETTER SHA - u'\u044d' # 0xdc -> CYRILLIC SMALL LETTER E - u'\u0449' # 0xdd -> CYRILLIC SMALL LETTER SHCHA - u'\u0447' # 0xde -> CYRILLIC SMALL LETTER CHE - u'\u044a' # 0xdf -> CYRILLIC SMALL LETTER HARD SIGN - u'\u042e' # 0xe0 -> CYRILLIC CAPITAL LETTER YU - u'\u0410' # 0xe1 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xe2 -> CYRILLIC CAPITAL LETTER BE - u'\u0426' # 0xe3 -> CYRILLIC CAPITAL LETTER TSE - u'\u0414' # 0xe4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xe5 -> CYRILLIC CAPITAL LETTER IE - u'\u0424' # 0xe6 -> CYRILLIC CAPITAL LETTER EF - u'\u0413' # 0xe7 -> CYRILLIC CAPITAL LETTER GHE - u'\u0425' # 0xe8 -> CYRILLIC CAPITAL LETTER HA - u'\u0418' # 0xe9 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xea -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xeb -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xec -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xed -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xee -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xef -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xf0 -> CYRILLIC CAPITAL LETTER PE - u'\u042f' # 0xf1 -> CYRILLIC CAPITAL LETTER YA - u'\u0420' # 0xf2 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xf3 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xf4 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xf5 -> CYRILLIC CAPITAL LETTER U - u'\u0416' # 0xf6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0412' # 0xf7 -> CYRILLIC CAPITAL LETTER VE - u'\u042c' # 0xf8 -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042b' # 0xf9 -> CYRILLIC CAPITAL LETTER YERU - u'\u0417' # 0xfa -> CYRILLIC CAPITAL LETTER ZE - u'\u0428' # 0xfb -> CYRILLIC CAPITAL LETTER SHA - u'\u042d' # 0xfc -> CYRILLIC CAPITAL LETTER E - u'\u0429' # 0xfd -> CYRILLIC CAPITAL LETTER SHCHA - u'\u0427' # 0xfe -> CYRILLIC CAPITAL LETTER CHE - u'\u042a' # 0xff -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\xa0' # 0x9A -> NO-BREAK SPACE + u'\u2321' # 0x9B -> BOTTOM HALF INTEGRAL + u'\xb0' # 0x9C -> DEGREE SIGN + u'\xb2' # 0x9D -> SUPERSCRIPT TWO + u'\xb7' # 0x9E -> MIDDLE DOT + u'\xf7' # 0x9F -> DIVISION SIGN + u'\u2550' # 0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u2551' # 0xA1 -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2552' # 0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u0451' # 0xA3 -> CYRILLIC SMALL LETTER IO + u'\u2553' # 0xA4 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u2554' # 0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2555' # 0xA6 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2556' # 0xA7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2557' # 0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u2558' # 0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2559' # 0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u255a' # 0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u255b' # 0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u255c' # 0xAD -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255d' # 0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255e' # 0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u2560' # 0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2561' # 0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u0401' # 0xB3 -> CYRILLIC CAPITAL LETTER IO + u'\u2562' # 0xB4 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2563' # 0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2564' # 0xB6 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0xB7 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2566' # 0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2567' # 0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2569' # 0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u256a' # 0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u256b' # 0xBD -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256c' # 0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa9' # 0xBF -> COPYRIGHT SIGN + u'\u044e' # 0xC0 -> CYRILLIC SMALL LETTER YU + u'\u0430' # 0xC1 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xC2 -> CYRILLIC SMALL LETTER BE + u'\u0446' # 0xC3 -> CYRILLIC SMALL LETTER TSE + u'\u0434' # 0xC4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xC5 -> CYRILLIC SMALL LETTER IE + u'\u0444' # 0xC6 -> CYRILLIC SMALL LETTER EF + u'\u0433' # 0xC7 -> CYRILLIC SMALL LETTER GHE + u'\u0445' # 0xC8 -> CYRILLIC SMALL LETTER HA + u'\u0438' # 0xC9 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xCA -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xCB -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xCC -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xCD -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xCE -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xCF -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xD0 -> CYRILLIC SMALL LETTER PE + u'\u044f' # 0xD1 -> CYRILLIC SMALL LETTER YA + u'\u0440' # 0xD2 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xD3 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xD4 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xD5 -> CYRILLIC SMALL LETTER U + u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE + u'\u0432' # 0xD7 -> CYRILLIC SMALL LETTER VE + u'\u044c' # 0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044b' # 0xD9 -> CYRILLIC SMALL LETTER YERU + u'\u0437' # 0xDA -> CYRILLIC SMALL LETTER ZE + u'\u0448' # 0xDB -> CYRILLIC SMALL LETTER SHA + u'\u044d' # 0xDC -> CYRILLIC SMALL LETTER E + u'\u0449' # 0xDD -> CYRILLIC SMALL LETTER SHCHA + u'\u0447' # 0xDE -> CYRILLIC SMALL LETTER CHE + u'\u044a' # 0xDF -> CYRILLIC SMALL LETTER HARD SIGN + u'\u042e' # 0xE0 -> CYRILLIC CAPITAL LETTER YU + u'\u0410' # 0xE1 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0xE2 -> CYRILLIC CAPITAL LETTER BE + u'\u0426' # 0xE3 -> CYRILLIC CAPITAL LETTER TSE + u'\u0414' # 0xE4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0xE5 -> CYRILLIC CAPITAL LETTER IE + u'\u0424' # 0xE6 -> CYRILLIC CAPITAL LETTER EF + u'\u0413' # 0xE7 -> CYRILLIC CAPITAL LETTER GHE + u'\u0425' # 0xE8 -> CYRILLIC CAPITAL LETTER HA + u'\u0418' # 0xE9 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0xEA -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0xEB -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0xEC -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0xED -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0xEE -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0xEF -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0xF0 -> CYRILLIC CAPITAL LETTER PE + u'\u042f' # 0xF1 -> CYRILLIC CAPITAL LETTER YA + u'\u0420' # 0xF2 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0xF3 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0xF4 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0xF5 -> CYRILLIC CAPITAL LETTER U + u'\u0416' # 0xF6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0412' # 0xF7 -> CYRILLIC CAPITAL LETTER VE + u'\u042c' # 0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042b' # 0xF9 -> CYRILLIC CAPITAL LETTER YERU + u'\u0417' # 0xFA -> CYRILLIC CAPITAL LETTER ZE + u'\u0428' # 0xFB -> CYRILLIC CAPITAL LETTER SHA + u'\u042d' # 0xFC -> CYRILLIC CAPITAL LETTER E + u'\u0429' # 0xFD -> CYRILLIC CAPITAL LETTER SHCHA + u'\u0427' # 0xFE -> CYRILLIC CAPITAL LETTER CHE + u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,138 +415,139 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE - 0x00a0: 0x9a, # NO-BREAK SPACE - 0x00a9: 0xbf, # COPYRIGHT SIGN - 0x00b0: 0x9c, # DEGREE SIGN - 0x00b2: 0x9d, # SUPERSCRIPT TWO - 0x00b7: 0x9e, # MIDDLE DOT - 0x00f7: 0x9f, # DIVISION SIGN - 0x0401: 0xb3, # CYRILLIC CAPITAL LETTER IO - 0x0410: 0xe1, # CYRILLIC CAPITAL LETTER A - 0x0411: 0xe2, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0xf7, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0xe7, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0xe4, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0xe5, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0xf6, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0xfa, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0xe9, # CYRILLIC CAPITAL LETTER I - 0x0419: 0xea, # CYRILLIC CAPITAL LETTER SHORT I - 0x041a: 0xeb, # CYRILLIC CAPITAL LETTER KA - 0x041b: 0xec, # CYRILLIC CAPITAL LETTER EL - 0x041c: 0xed, # CYRILLIC CAPITAL LETTER EM - 0x041d: 0xee, # CYRILLIC CAPITAL LETTER EN - 0x041e: 0xef, # CYRILLIC CAPITAL LETTER O - 0x041f: 0xf0, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0xf2, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0xf3, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0xf4, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0xf5, # CYRILLIC CAPITAL LETTER U - 0x0424: 0xe6, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0xe8, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0xe3, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0xfe, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0xfb, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0xfd, # CYRILLIC CAPITAL LETTER SHCHA - 0x042a: 0xff, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042b: 0xf9, # CYRILLIC CAPITAL LETTER YERU - 0x042c: 0xf8, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042d: 0xfc, # CYRILLIC CAPITAL LETTER E - 0x042e: 0xe0, # CYRILLIC CAPITAL LETTER YU - 0x042f: 0xf1, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0xc1, # CYRILLIC SMALL LETTER A - 0x0431: 0xc2, # CYRILLIC SMALL LETTER BE - 0x0432: 0xd7, # CYRILLIC SMALL LETTER VE - 0x0433: 0xc7, # CYRILLIC SMALL LETTER GHE - 0x0434: 0xc4, # CYRILLIC SMALL LETTER DE - 0x0435: 0xc5, # CYRILLIC SMALL LETTER IE - 0x0436: 0xd6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0xda, # CYRILLIC SMALL LETTER ZE - 0x0438: 0xc9, # CYRILLIC SMALL LETTER I - 0x0439: 0xca, # CYRILLIC SMALL LETTER SHORT I - 0x043a: 0xcb, # CYRILLIC SMALL LETTER KA - 0x043b: 0xcc, # CYRILLIC SMALL LETTER EL - 0x043c: 0xcd, # CYRILLIC SMALL LETTER EM - 0x043d: 0xce, # CYRILLIC SMALL LETTER EN - 0x043e: 0xcf, # CYRILLIC SMALL LETTER O - 0x043f: 0xd0, # CYRILLIC SMALL LETTER PE - 0x0440: 0xd2, # CYRILLIC SMALL LETTER ER - 0x0441: 0xd3, # CYRILLIC SMALL LETTER ES - 0x0442: 0xd4, # CYRILLIC SMALL LETTER TE - 0x0443: 0xd5, # CYRILLIC SMALL LETTER U - 0x0444: 0xc6, # CYRILLIC SMALL LETTER EF - 0x0445: 0xc8, # CYRILLIC SMALL LETTER HA - 0x0446: 0xc3, # CYRILLIC SMALL LETTER TSE - 0x0447: 0xde, # CYRILLIC SMALL LETTER CHE - 0x0448: 0xdb, # CYRILLIC SMALL LETTER SHA - 0x0449: 0xdd, # CYRILLIC SMALL LETTER SHCHA - 0x044a: 0xdf, # CYRILLIC SMALL LETTER HARD SIGN - 0x044b: 0xd9, # CYRILLIC SMALL LETTER YERU - 0x044c: 0xd8, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044d: 0xdc, # CYRILLIC SMALL LETTER E - 0x044e: 0xc0, # CYRILLIC SMALL LETTER YU - 0x044f: 0xd1, # CYRILLIC SMALL LETTER YA - 0x0451: 0xa3, # CYRILLIC SMALL LETTER IO + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0x9A, # NO-BREAK SPACE + 0x00A9: 0xBF, # COPYRIGHT SIGN + 0x00B0: 0x9C, # DEGREE SIGN + 0x00B2: 0x9D, # SUPERSCRIPT TWO + 0x00B7: 0x9E, # MIDDLE DOT + 0x00F7: 0x9F, # DIVISION SIGN + 0x0401: 0xB3, # CYRILLIC CAPITAL LETTER IO + 0x0410: 0xE1, # CYRILLIC CAPITAL LETTER A + 0x0411: 0xE2, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0xF7, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0xE7, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0xE4, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0xE5, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0xF6, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0xFA, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0xE9, # CYRILLIC CAPITAL LETTER I + 0x0419: 0xEA, # CYRILLIC CAPITAL LETTER SHORT I + 0x041A: 0xEB, # CYRILLIC CAPITAL LETTER KA + 0x041B: 0xEC, # CYRILLIC CAPITAL LETTER EL + 0x041C: 0xED, # CYRILLIC CAPITAL LETTER EM + 0x041D: 0xEE, # CYRILLIC CAPITAL LETTER EN + 0x041E: 0xEF, # CYRILLIC CAPITAL LETTER O + 0x041F: 0xF0, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0xF2, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0xF3, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0xF4, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0xF5, # CYRILLIC CAPITAL LETTER U + 0x0424: 0xE6, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0xE8, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0xE3, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0xFE, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0xFB, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0xFD, # CYRILLIC CAPITAL LETTER SHCHA + 0x042A: 0xFF, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042B: 0xF9, # CYRILLIC CAPITAL LETTER YERU + 0x042C: 0xF8, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042D: 0xFC, # CYRILLIC CAPITAL LETTER E + 0x042E: 0xE0, # CYRILLIC CAPITAL LETTER YU + 0x042F: 0xF1, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0xC1, # CYRILLIC SMALL LETTER A + 0x0431: 0xC2, # CYRILLIC SMALL LETTER BE + 0x0432: 0xD7, # CYRILLIC SMALL LETTER VE + 0x0433: 0xC7, # CYRILLIC SMALL LETTER GHE + 0x0434: 0xC4, # CYRILLIC SMALL LETTER DE + 0x0435: 0xC5, # CYRILLIC SMALL LETTER IE + 0x0436: 0xD6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0xDA, # CYRILLIC SMALL LETTER ZE + 0x0438: 0xC9, # CYRILLIC SMALL LETTER I + 0x0439: 0xCA, # CYRILLIC SMALL LETTER SHORT I + 0x043A: 0xCB, # CYRILLIC SMALL LETTER KA + 0x043B: 0xCC, # CYRILLIC SMALL LETTER EL + 0x043C: 0xCD, # CYRILLIC SMALL LETTER EM + 0x043D: 0xCE, # CYRILLIC SMALL LETTER EN + 0x043E: 0xCF, # CYRILLIC SMALL LETTER O + 0x043F: 0xD0, # CYRILLIC SMALL LETTER PE + 0x0440: 0xD2, # CYRILLIC SMALL LETTER ER + 0x0441: 0xD3, # CYRILLIC SMALL LETTER ES + 0x0442: 0xD4, # CYRILLIC SMALL LETTER TE + 0x0443: 0xD5, # CYRILLIC SMALL LETTER U + 0x0444: 0xC6, # CYRILLIC SMALL LETTER EF + 0x0445: 0xC8, # CYRILLIC SMALL LETTER HA + 0x0446: 0xC3, # CYRILLIC SMALL LETTER TSE + 0x0447: 0xDE, # CYRILLIC SMALL LETTER CHE + 0x0448: 0xDB, # CYRILLIC SMALL LETTER SHA + 0x0449: 0xDD, # CYRILLIC SMALL LETTER SHCHA + 0x044A: 0xDF, # CYRILLIC SMALL LETTER HARD SIGN + 0x044B: 0xD9, # CYRILLIC SMALL LETTER YERU + 0x044C: 0xD8, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044D: 0xDC, # CYRILLIC SMALL LETTER E + 0x044E: 0xC0, # CYRILLIC SMALL LETTER YU + 0x044F: 0xD1, # CYRILLIC SMALL LETTER YA + 0x0451: 0xA3, # CYRILLIC SMALL LETTER IO 0x2219: 0x95, # BULLET OPERATOR - 0x221a: 0x96, # SQUARE ROOT + 0x221A: 0x96, # SQUARE ROOT 0x2248: 0x97, # ALMOST EQUAL TO 0x2264: 0x98, # LESS-THAN OR EQUAL TO 0x2265: 0x99, # GREATER-THAN OR EQUAL TO 0x2320: 0x93, # TOP HALF INTEGRAL - 0x2321: 0x9b, # BOTTOM HALF INTEGRAL + 0x2321: 0x9B, # BOTTOM HALF INTEGRAL 0x2500: 0x80, # BOX DRAWINGS LIGHT HORIZONTAL 0x2502: 0x81, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x82, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x250C: 0x82, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x2510: 0x83, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x2514: 0x84, # BOX DRAWINGS LIGHT UP AND RIGHT 0x2518: 0x85, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x86, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x251C: 0x86, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x2524: 0x87, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x88, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x252C: 0x88, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x2534: 0x89, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x8a, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0xa0, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0xa1, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0xa2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0xa4, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0xa5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0xa6, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0xa7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0xa8, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0xa9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0xaa, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0xab, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0xac, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0xad, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0xae, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0xaf, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0xb0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0xb1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0xb2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0xb4, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0xb5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0xb6, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0xb7, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0xb8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0xb9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0xba, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0xbb, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0xbc, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0xbd, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0xbe, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x8b, # UPPER HALF BLOCK - 0x2584: 0x8c, # LOWER HALF BLOCK - 0x2588: 0x8d, # FULL BLOCK - 0x258c: 0x8e, # LEFT HALF BLOCK - 0x2590: 0x8f, # RIGHT HALF BLOCK + 0x253C: 0x8A, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0xA0, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0xA1, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0xA2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0xA4, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0xA5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0xA6, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0xA7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0xA8, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0xA9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0xAA, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255A: 0xAB, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255B: 0xAC, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255C: 0xAD, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255D: 0xAE, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255E: 0xAF, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255F: 0xB0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0xB1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0xB2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0xB4, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0xB5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0xB6, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0xB7, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0xB8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0xB9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0xBA, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0xBB, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256A: 0xBC, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256B: 0xBD, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256C: 0xBE, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x8B, # UPPER HALF BLOCK + 0x2584: 0x8C, # LOWER HALF BLOCK + 0x2588: 0x8D, # FULL BLOCK + 0x258C: 0x8E, # LEFT HALF BLOCK + 0x2590: 0x8F, # RIGHT HALF BLOCK 0x2591: 0x90, # LIGHT SHADE 0x2592: 0x91, # MEDIUM SHADE 0x2593: 0x92, # DARK SHADE - 0x25a0: 0x94, # BLACK SQUARE -} \ No newline at end of file + 0x25A0: 0x94, # BLACK SQUARE +} + Index: koi8_u.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/koi8_u.py,v retrieving revision 1.3 retrieving revision 1.4 diff -u -d -r1.3 -r1.4 --- koi8_u.py 24 Oct 2005 12:07:48 -0000 1.3 +++ koi8_u.py 24 Oct 2005 12:14:59 -0000 1.4 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\u2500' # 0x80 -> BOX DRAWINGS LIGHT HORIZONTAL u'\u2502' # 0x81 -> BOX DRAWINGS LIGHT VERTICAL u'\u250c' # 0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT @@ -170,12 +170,12 @@ u'\u2524' # 0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT u'\u252c' # 0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL u'\u2534' # 0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u253c' # 0x8a -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u2580' # 0x8b -> UPPER HALF BLOCK - u'\u2584' # 0x8c -> LOWER HALF BLOCK - u'\u2588' # 0x8d -> FULL BLOCK - u'\u258c' # 0x8e -> LEFT HALF BLOCK - u'\u2590' # 0x8f -> RIGHT HALF BLOCK + u'\u253c' # 0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u2580' # 0x8B -> UPPER HALF BLOCK + u'\u2584' # 0x8C -> LOWER HALF BLOCK + u'\u2588' # 0x8D -> FULL BLOCK + u'\u258c' # 0x8E -> LEFT HALF BLOCK + u'\u2590' # 0x8F -> RIGHT HALF BLOCK u'\u2591' # 0x90 -> LIGHT SHADE u'\u2592' # 0x91 -> MEDIUM SHADE u'\u2593' # 0x92 -> DARK SHADE @@ -186,108 +186,108 @@ u'\u2248' # 0x97 -> ALMOST EQUAL TO u'\u2264' # 0x98 -> LESS-THAN OR EQUAL TO u'\u2265' # 0x99 -> GREATER-THAN OR EQUAL TO - u'\xa0' # 0x9a -> NO-BREAK SPACE - u'\u2321' # 0x9b -> BOTTOM HALF INTEGRAL - u'\xb0' # 0x9c -> DEGREE SIGN - u'\xb2' # 0x9d -> SUPERSCRIPT TWO - u'\xb7' # 0x9e -> MIDDLE DOT - u'\xf7' # 0x9f -> DIVISION SIGN - u'\u2550' # 0xa0 -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u2551' # 0xa1 -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2552' # 0xa2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u0451' # 0xa3 -> CYRILLIC SMALL LETTER IO - u'\u0454' # 0xa4 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u2554' # 0xa5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u0456' # 0xa6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0457' # 0xa7 -> CYRILLIC SMALL LETTER YI (UKRAINIAN) - u'\u2557' # 0xa8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u2558' # 0xa9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2559' # 0xaa -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u255a' # 0xab -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u255b' # 0xac -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u0491' # 0xad -> CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN - u'\u255d' # 0xae -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255e' # 0xaf -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0xb0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u2560' # 0xb1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2561' # 0xb2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u0401' # 0xb3 -> CYRILLIC CAPITAL LETTER IO - u'\u0404' # 0xb4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u2563' # 0xb5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u0406' # 0xb6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0407' # 0xb7 -> CYRILLIC CAPITAL LETTER YI (UKRAINIAN) - u'\u2566' # 0xb8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2567' # 0xb9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0xba -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2569' # 0xbb -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u256a' # 0xbc -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u0490' # 0xbd -> CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN - u'\u256c' # 0xbe -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa9' # 0xbf -> COPYRIGHT SIGN - u'\u044e' # 0xc0 -> CYRILLIC SMALL LETTER YU - u'\u0430' # 0xc1 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xc2 -> CYRILLIC SMALL LETTER BE - u'\u0446' # 0xc3 -> CYRILLIC SMALL LETTER TSE - u'\u0434' # 0xc4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xc5 -> CYRILLIC SMALL LETTER IE - u'\u0444' # 0xc6 -> CYRILLIC SMALL LETTER EF - u'\u0433' # 0xc7 -> CYRILLIC SMALL LETTER GHE - u'\u0445' # 0xc8 -> CYRILLIC SMALL LETTER HA - u'\u0438' # 0xc9 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xca -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xcb -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xcc -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xcd -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xce -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xcf -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xd0 -> CYRILLIC SMALL LETTER PE - u'\u044f' # 0xd1 -> CYRILLIC SMALL LETTER YA - u'\u0440' # 0xd2 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xd3 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xd4 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xd5 -> CYRILLIC SMALL LETTER U - u'\u0436' # 0xd6 -> CYRILLIC SMALL LETTER ZHE - u'\u0432' # 0xd7 -> CYRILLIC SMALL LETTER VE - u'\u044c' # 0xd8 -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044b' # 0xd9 -> CYRILLIC SMALL LETTER YERU - u'\u0437' # 0xda -> CYRILLIC SMALL LETTER ZE - u'\u0448' # 0xdb -> CYRILLIC SMALL LETTER SHA - u'\u044d' # 0xdc -> CYRILLIC SMALL LETTER E - u'\u0449' # 0xdd -> CYRILLIC SMALL LETTER SHCHA - u'\u0447' # 0xde -> CYRILLIC SMALL LETTER CHE - u'\u044a' # 0xdf -> CYRILLIC SMALL LETTER HARD SIGN - u'\u042e' # 0xe0 -> CYRILLIC CAPITAL LETTER YU - u'\u0410' # 0xe1 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xe2 -> CYRILLIC CAPITAL LETTER BE - u'\u0426' # 0xe3 -> CYRILLIC CAPITAL LETTER TSE - u'\u0414' # 0xe4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xe5 -> CYRILLIC CAPITAL LETTER IE - u'\u0424' # 0xe6 -> CYRILLIC CAPITAL LETTER EF - u'\u0413' # 0xe7 -> CYRILLIC CAPITAL LETTER GHE - u'\u0425' # 0xe8 -> CYRILLIC CAPITAL LETTER HA - u'\u0418' # 0xe9 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xea -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xeb -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xec -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xed -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xee -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xef -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xf0 -> CYRILLIC CAPITAL LETTER PE - u'\u042f' # 0xf1 -> CYRILLIC CAPITAL LETTER YA - u'\u0420' # 0xf2 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xf3 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xf4 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xf5 -> CYRILLIC CAPITAL LETTER U - u'\u0416' # 0xf6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0412' # 0xf7 -> CYRILLIC CAPITAL LETTER VE - u'\u042c' # 0xf8 -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042b' # 0xf9 -> CYRILLIC CAPITAL LETTER YERU - u'\u0417' # 0xfa -> CYRILLIC CAPITAL LETTER ZE - u'\u0428' # 0xfb -> CYRILLIC CAPITAL LETTER SHA - u'\u042d' # 0xfc -> CYRILLIC CAPITAL LETTER E - u'\u0429' # 0xfd -> CYRILLIC CAPITAL LETTER SHCHA - u'\u0427' # 0xfe -> CYRILLIC CAPITAL LETTER CHE - u'\u042a' # 0xff -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\xa0' # 0x9A -> NO-BREAK SPACE + u'\u2321' # 0x9B -> BOTTOM HALF INTEGRAL + u'\xb0' # 0x9C -> DEGREE SIGN + u'\xb2' # 0x9D -> SUPERSCRIPT TWO + u'\xb7' # 0x9E -> MIDDLE DOT + u'\xf7' # 0x9F -> DIVISION SIGN + u'\u2550' # 0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u2551' # 0xA1 -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2552' # 0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u0451' # 0xA3 -> CYRILLIC SMALL LETTER IO + u'\u0454' # 0xA4 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u2554' # 0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u0456' # 0xA6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0457' # 0xA7 -> CYRILLIC SMALL LETTER YI (UKRAINIAN) + u'\u2557' # 0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u2558' # 0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2559' # 0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u255a' # 0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u255b' # 0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u0491' # 0xAD -> CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN + u'\u255d' # 0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255e' # 0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u2560' # 0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2561' # 0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u0401' # 0xB3 -> CYRILLIC CAPITAL LETTER IO + u'\u0404' # 0xB4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u2563' # 0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u0406' # 0xB6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0407' # 0xB7 -> CYRILLIC CAPITAL LETTER YI (UKRAINIAN) + u'\u2566' # 0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2567' # 0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2569' # 0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u256a' # 0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u0490' # 0xBD -> CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN + u'\u256c' # 0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa9' # 0xBF -> COPYRIGHT SIGN + u'\u044e' # 0xC0 -> CYRILLIC SMALL LETTER YU + u'\u0430' # 0xC1 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xC2 -> CYRILLIC SMALL LETTER BE + u'\u0446' # 0xC3 -> CYRILLIC SMALL LETTER TSE + u'\u0434' # 0xC4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xC5 -> CYRILLIC SMALL LETTER IE + u'\u0444' # 0xC6 -> CYRILLIC SMALL LETTER EF + u'\u0433' # 0xC7 -> CYRILLIC SMALL LETTER GHE + u'\u0445' # 0xC8 -> CYRILLIC SMALL LETTER HA + u'\u0438' # 0xC9 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xCA -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xCB -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xCC -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xCD -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xCE -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xCF -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xD0 -> CYRILLIC SMALL LETTER PE + u'\u044f' # 0xD1 -> CYRILLIC SMALL LETTER YA + u'\u0440' # 0xD2 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xD3 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xD4 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xD5 -> CYRILLIC SMALL LETTER U + u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE + u'\u0432' # 0xD7 -> CYRILLIC SMALL LETTER VE + u'\u044c' # 0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044b' # 0xD9 -> CYRILLIC SMALL LETTER YERU + u'\u0437' # 0xDA -> CYRILLIC SMALL LETTER ZE + u'\u0448' # 0xDB -> CYRILLIC SMALL LETTER SHA + u'\u044d' # 0xDC -> CYRILLIC SMALL LETTER E + u'\u0449' # 0xDD -> CYRILLIC SMALL LETTER SHCHA + u'\u0447' # 0xDE -> CYRILLIC SMALL LETTER CHE + u'\u044a' # 0xDF -> CYRILLIC SMALL LETTER HARD SIGN + u'\u042e' # 0xE0 -> CYRILLIC CAPITAL LETTER YU + u'\u0410' # 0xE1 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0xE2 -> CYRILLIC CAPITAL LETTER BE + u'\u0426' # 0xE3 -> CYRILLIC CAPITAL LETTER TSE + u'\u0414' # 0xE4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0xE5 -> CYRILLIC CAPITAL LETTER IE + u'\u0424' # 0xE6 -> CYRILLIC CAPITAL LETTER EF + u'\u0413' # 0xE7 -> CYRILLIC CAPITAL LETTER GHE + u'\u0425' # 0xE8 -> CYRILLIC CAPITAL LETTER HA + u'\u0418' # 0xE9 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0xEA -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0xEB -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0xEC -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0xED -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0xEE -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0xEF -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0xF0 -> CYRILLIC CAPITAL LETTER PE + u'\u042f' # 0xF1 -> CYRILLIC CAPITAL LETTER YA + u'\u0420' # 0xF2 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0xF3 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0xF4 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0xF5 -> CYRILLIC CAPITAL LETTER U + u'\u0416' # 0xF6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0412' # 0xF7 -> CYRILLIC CAPITAL LETTER VE + u'\u042c' # 0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042b' # 0xF9 -> CYRILLIC CAPITAL LETTER YERU + u'\u0417' # 0xFA -> CYRILLIC CAPITAL LETTER ZE + u'\u0428' # 0xFB -> CYRILLIC CAPITAL LETTER SHA + u'\u042d' # 0xFC -> CYRILLIC CAPITAL LETTER E + u'\u0429' # 0xFD -> CYRILLIC CAPITAL LETTER SHCHA + u'\u0427' # 0xFE -> CYRILLIC CAPITAL LETTER CHE + u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,138 +415,139 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE - 0x00a0: 0x9a, # NO-BREAK SPACE - 0x00a9: 0xbf, # COPYRIGHT SIGN - 0x00b0: 0x9c, # DEGREE SIGN - 0x00b2: 0x9d, # SUPERSCRIPT TWO - 0x00b7: 0x9e, # MIDDLE DOT - 0x00f7: 0x9f, # DIVISION SIGN - 0x0401: 0xb3, # CYRILLIC CAPITAL LETTER IO - 0x0404: 0xb4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0406: 0xb6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0407: 0xb7, # CYRILLIC CAPITAL LETTER YI (UKRAINIAN) - 0x0410: 0xe1, # CYRILLIC CAPITAL LETTER A - 0x0411: 0xe2, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0xf7, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0xe7, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0xe4, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0xe5, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0xf6, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0xfa, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0xe9, # CYRILLIC CAPITAL LETTER I - 0x0419: 0xea, # CYRILLIC CAPITAL LETTER SHORT I - 0x041a: 0xeb, # CYRILLIC CAPITAL LETTER KA - 0x041b: 0xec, # CYRILLIC CAPITAL LETTER EL - 0x041c: 0xed, # CYRILLIC CAPITAL LETTER EM - 0x041d: 0xee, # CYRILLIC CAPITAL LETTER EN - 0x041e: 0xef, # CYRILLIC CAPITAL LETTER O - 0x041f: 0xf0, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0xf2, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0xf3, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0xf4, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0xf5, # CYRILLIC CAPITAL LETTER U - 0x0424: 0xe6, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0xe8, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0xe3, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0xfe, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0xfb, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0xfd, # CYRILLIC CAPITAL LETTER SHCHA - 0x042a: 0xff, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042b: 0xf9, # CYRILLIC CAPITAL LETTER YERU - 0x042c: 0xf8, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042d: 0xfc, # CYRILLIC CAPITAL LETTER E - 0x042e: 0xe0, # CYRILLIC CAPITAL LETTER YU - 0x042f: 0xf1, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0xc1, # CYRILLIC SMALL LETTER A - 0x0431: 0xc2, # CYRILLIC SMALL LETTER BE - 0x0432: 0xd7, # CYRILLIC SMALL LETTER VE - 0x0433: 0xc7, # CYRILLIC SMALL LETTER GHE - 0x0434: 0xc4, # CYRILLIC SMALL LETTER DE - 0x0435: 0xc5, # CYRILLIC SMALL LETTER IE - 0x0436: 0xd6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0xda, # CYRILLIC SMALL LETTER ZE - 0x0438: 0xc9, # CYRILLIC SMALL LETTER I - 0x0439: 0xca, # CYRILLIC SMALL LETTER SHORT I - 0x043a: 0xcb, # CYRILLIC SMALL LETTER KA - 0x043b: 0xcc, # CYRILLIC SMALL LETTER EL - 0x043c: 0xcd, # CYRILLIC SMALL LETTER EM - 0x043d: 0xce, # CYRILLIC SMALL LETTER EN - 0x043e: 0xcf, # CYRILLIC SMALL LETTER O - 0x043f: 0xd0, # CYRILLIC SMALL LETTER PE - 0x0440: 0xd2, # CYRILLIC SMALL LETTER ER - 0x0441: 0xd3, # CYRILLIC SMALL LETTER ES - 0x0442: 0xd4, # CYRILLIC SMALL LETTER TE - 0x0443: 0xd5, # CYRILLIC SMALL LETTER U - 0x0444: 0xc6, # CYRILLIC SMALL LETTER EF - 0x0445: 0xc8, # CYRILLIC SMALL LETTER HA - 0x0446: 0xc3, # CYRILLIC SMALL LETTER TSE - 0x0447: 0xde, # CYRILLIC SMALL LETTER CHE - 0x0448: 0xdb, # CYRILLIC SMALL LETTER SHA - 0x0449: 0xdd, # CYRILLIC SMALL LETTER SHCHA - 0x044a: 0xdf, # CYRILLIC SMALL LETTER HARD SIGN - 0x044b: 0xd9, # CYRILLIC SMALL LETTER YERU - 0x044c: 0xd8, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044d: 0xdc, # CYRILLIC SMALL LETTER E - 0x044e: 0xc0, # CYRILLIC SMALL LETTER YU - 0x044f: 0xd1, # CYRILLIC SMALL LETTER YA - 0x0451: 0xa3, # CYRILLIC SMALL LETTER IO - 0x0454: 0xa4, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0456: 0xa6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0457: 0xa7, # CYRILLIC SMALL LETTER YI (UKRAINIAN) - 0x0490: 0xbd, # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN - 0x0491: 0xad, # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE + 0x00A0: 0x9A, # NO-BREAK SPACE + 0x00A9: 0xBF, # COPYRIGHT SIGN + 0x00B0: 0x9C, # DEGREE SIGN + 0x00B2: 0x9D, # SUPERSCRIPT TWO + 0x00B7: 0x9E, # MIDDLE DOT + 0x00F7: 0x9F, # DIVISION SIGN + 0x0401: 0xB3, # CYRILLIC CAPITAL LETTER IO + 0x0404: 0xB4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0406: 0xB6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0xB7, # CYRILLIC CAPITAL LETTER YI (UKRAINIAN) + 0x0410: 0xE1, # CYRILLIC CAPITAL LETTER A + 0x0411: 0xE2, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0xF7, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0xE7, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0xE4, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0xE5, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0xF6, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0xFA, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0xE9, # CYRILLIC CAPITAL LETTER I + 0x0419: 0xEA, # CYRILLIC CAPITAL LETTER SHORT I + 0x041A: 0xEB, # CYRILLIC CAPITAL LETTER KA + 0x041B: 0xEC, # CYRILLIC CAPITAL LETTER EL + 0x041C: 0xED, # CYRILLIC CAPITAL LETTER EM + 0x041D: 0xEE, # CYRILLIC CAPITAL LETTER EN + 0x041E: 0xEF, # CYRILLIC CAPITAL LETTER O + 0x041F: 0xF0, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0xF2, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0xF3, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0xF4, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0xF5, # CYRILLIC CAPITAL LETTER U + 0x0424: 0xE6, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0xE8, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0xE3, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0xFE, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0xFB, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0xFD, # CYRILLIC CAPITAL LETTER SHCHA + 0x042A: 0xFF, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042B: 0xF9, # CYRILLIC CAPITAL LETTER YERU + 0x042C: 0xF8, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042D: 0xFC, # CYRILLIC CAPITAL LETTER E + 0x042E: 0xE0, # CYRILLIC CAPITAL LETTER YU + 0x042F: 0xF1, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0xC1, # CYRILLIC SMALL LETTER A + 0x0431: 0xC2, # CYRILLIC SMALL LETTER BE + 0x0432: 0xD7, # CYRILLIC SMALL LETTER VE + 0x0433: 0xC7, # CYRILLIC SMALL LETTER GHE + 0x0434: 0xC4, # CYRILLIC SMALL LETTER DE + 0x0435: 0xC5, # CYRILLIC SMALL LETTER IE + 0x0436: 0xD6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0xDA, # CYRILLIC SMALL LETTER ZE + 0x0438: 0xC9, # CYRILLIC SMALL LETTER I + 0x0439: 0xCA, # CYRILLIC SMALL LETTER SHORT I + 0x043A: 0xCB, # CYRILLIC SMALL LETTER KA + 0x043B: 0xCC, # CYRILLIC SMALL LETTER EL + 0x043C: 0xCD, # CYRILLIC SMALL LETTER EM + 0x043D: 0xCE, # CYRILLIC SMALL LETTER EN + 0x043E: 0xCF, # CYRILLIC SMALL LETTER O + 0x043F: 0xD0, # CYRILLIC SMALL LETTER PE + 0x0440: 0xD2, # CYRILLIC SMALL LETTER ER + 0x0441: 0xD3, # CYRILLIC SMALL LETTER ES + 0x0442: 0xD4, # CYRILLIC SMALL LETTER TE + 0x0443: 0xD5, # CYRILLIC SMALL LETTER U + 0x0444: 0xC6, # CYRILLIC SMALL LETTER EF + 0x0445: 0xC8, # CYRILLIC SMALL LETTER HA + 0x0446: 0xC3, # CYRILLIC SMALL LETTER TSE + 0x0447: 0xDE, # CYRILLIC SMALL LETTER CHE + 0x0448: 0xDB, # CYRILLIC SMALL LETTER SHA + 0x0449: 0xDD, # CYRILLIC SMALL LETTER SHCHA + 0x044A: 0xDF, # CYRILLIC SMALL LETTER HARD SIGN + 0x044B: 0xD9, # CYRILLIC SMALL LETTER YERU + 0x044C: 0xD8, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044D: 0xDC, # CYRILLIC SMALL LETTER E + 0x044E: 0xC0, # CYRILLIC SMALL LETTER YU + 0x044F: 0xD1, # CYRILLIC SMALL LETTER YA + 0x0451: 0xA3, # CYRILLIC SMALL LETTER IO + 0x0454: 0xA4, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0456: 0xA6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0xA7, # CYRILLIC SMALL LETTER YI (UKRAINIAN) + 0x0490: 0xBD, # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN + 0x0491: 0xAD, # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN 0x2219: 0x95, # BULLET OPERATOR - 0x221a: 0x96, # SQUARE ROOT + 0x221A: 0x96, # SQUARE ROOT 0x2248: 0x97, # ALMOST EQUAL TO 0x2264: 0x98, # LESS-THAN OR EQUAL TO 0x2265: 0x99, # GREATER-THAN OR EQUAL TO 0x2320: 0x93, # TOP HALF INTEGRAL - 0x2321: 0x9b, # BOTTOM HALF INTEGRAL + 0x2321: 0x9B, # BOTTOM HALF INTEGRAL 0x2500: 0x80, # BOX DRAWINGS LIGHT HORIZONTAL 0x2502: 0x81, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x82, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x250C: 0x82, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x2510: 0x83, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x2514: 0x84, # BOX DRAWINGS LIGHT UP AND RIGHT 0x2518: 0x85, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x86, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x251C: 0x86, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x2524: 0x87, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x88, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x252C: 0x88, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x2534: 0x89, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x8a, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0xa0, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0xa1, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0xa2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2554: 0xa5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0xa8, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0xa9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0xaa, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0xab, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0xac, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255d: 0xae, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0xaf, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0xb0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0xb1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0xb2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2563: 0xb5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0xb8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0xb9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0xba, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0xbb, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0xbc, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256c: 0xbe, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x8b, # UPPER HALF BLOCK - 0x2584: 0x8c, # LOWER HALF BLOCK - 0x2588: 0x8d, # FULL BLOCK - 0x258c: 0x8e, # LEFT HALF BLOCK - 0x2590: 0x8f, # RIGHT HALF BLOCK + 0x253C: 0x8A, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0xA0, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0xA1, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0xA2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2554: 0xA5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0xA8, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0xA9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0xAA, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255A: 0xAB, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255B: 0xAC, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255D: 0xAE, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255E: 0xAF, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255F: 0xB0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0xB1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0xB2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2563: 0xB5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0xB8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0xB9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0xBA, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0xBB, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256A: 0xBC, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256C: 0xBE, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x8B, # UPPER HALF BLOCK + 0x2584: 0x8C, # LOWER HALF BLOCK + 0x2588: 0x8D, # FULL BLOCK + 0x258C: 0x8E, # LEFT HALF BLOCK + 0x2590: 0x8F, # RIGHT HALF BLOCK 0x2591: 0x90, # LIGHT SHADE 0x2592: 0x91, # MEDIUM SHADE 0x2593: 0x92, # DARK SHADE - 0x25a0: 0x94, # BLACK SQUARE -} \ No newline at end of file + 0x25A0: 0x94, # BLACK SQUARE +} + Index: mac_centeuro.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_centeuro.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- mac_centeuro.py 24 Oct 2005 12:07:48 -0000 1.2 +++ mac_centeuro.py 24 Oct 2005 12:14:59 -0000 1.3 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> CONTROL CHARACTER u'\x08' # 0x08 -> CONTROL CHARACTER u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0a -> CONTROL CHARACTER - u'\x0b' # 0x0b -> CONTROL CHARACTER - u'\x0c' # 0x0c -> CONTROL CHARACTER - u'\r' # 0x0d -> CONTROL CHARACTER - u'\x0e' # 0x0e -> CONTROL CHARACTER - u'\x0f' # 0x0f -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER u'\x10' # 0x10 -> CONTROL CHARACTER u'\x11' # 0x11 -> CONTROL CHARACTER u'\x12' # 0x12 -> CONTROL CHARACTER @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> CONTROL CHARACTER u'\x18' # 0x18 -> CONTROL CHARACTER u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1a -> CONTROL CHARACTER - u'\x1b' # 0x1b -> CONTROL CHARACTER - u'\x1c' # 0x1c -> CONTROL CHARACTER - u'\x1d' # 0x1d -> CONTROL CHARACTER - u'\x1e' # 0x1e -> CONTROL CHARACTER - u'\x1f' # 0x1f -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> CONTROL CHARACTER + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\u0100' # 0x81 -> LATIN CAPITAL LETTER A WITH MACRON u'\u0101' # 0x82 -> LATIN SMALL LETTER A WITH MACRON @@ -170,12 +170,12 @@ u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE u'\u0105' # 0x88 -> LATIN SMALL LETTER A WITH OGONEK u'\u010c' # 0x89 -> LATIN CAPITAL LETTER C WITH CARON - u'\xe4' # 0x8a -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u010d' # 0x8b -> LATIN SMALL LETTER C WITH CARON - u'\u0106' # 0x8c -> LATIN CAPITAL LETTER C WITH ACUTE - u'\u0107' # 0x8d -> LATIN SMALL LETTER C WITH ACUTE - u'\xe9' # 0x8e -> LATIN SMALL LETTER E WITH ACUTE - u'\u0179' # 0x8f -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u010d' # 0x8B -> LATIN SMALL LETTER C WITH CARON + u'\u0106' # 0x8C -> LATIN CAPITAL LETTER C WITH ACUTE + u'\u0107' # 0x8D -> LATIN SMALL LETTER C WITH ACUTE + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE u'\u017a' # 0x90 -> LATIN SMALL LETTER Z WITH ACUTE u'\u010e' # 0x91 -> LATIN CAPITAL LETTER D WITH CARON u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE @@ -186,108 +186,108 @@ u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE u'\u0117' # 0x98 -> LATIN SMALL LETTER E WITH DOT ABOVE u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9a -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9b -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9c -> LATIN SMALL LETTER U WITH ACUTE - u'\u011a' # 0x9d -> LATIN CAPITAL LETTER E WITH CARON - u'\u011b' # 0x9e -> LATIN SMALL LETTER E WITH CARON - u'\xfc' # 0x9f -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xa0 -> DAGGER - u'\xb0' # 0xa1 -> DEGREE SIGN - u'\u0118' # 0xa2 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa7' # 0xa4 -> SECTION SIGN - u'\u2022' # 0xa5 -> BULLET - u'\xb6' # 0xa6 -> PILCROW SIGN - u'\xdf' # 0xa7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xa8 -> REGISTERED SIGN - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\u2122' # 0xaa -> TRADE MARK SIGN - u'\u0119' # 0xab -> LATIN SMALL LETTER E WITH OGONEK - u'\xa8' # 0xac -> DIAERESIS - u'\u2260' # 0xad -> NOT EQUAL TO - u'\u0123' # 0xae -> LATIN SMALL LETTER G WITH CEDILLA - u'\u012e' # 0xaf -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u012f' # 0xb0 -> LATIN SMALL LETTER I WITH OGONEK - u'\u012a' # 0xb1 -> LATIN CAPITAL LETTER I WITH MACRON - u'\u2264' # 0xb2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xb3 -> GREATER-THAN OR EQUAL TO - u'\u012b' # 0xb4 -> LATIN SMALL LETTER I WITH MACRON - u'\u0136' # 0xb5 -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\u2202' # 0xb6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xb7 -> N-ARY SUMMATION - u'\u0142' # 0xb8 -> LATIN SMALL LETTER L WITH STROKE - u'\u013b' # 0xb9 -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u013c' # 0xba -> LATIN SMALL LETTER L WITH CEDILLA - u'\u013d' # 0xbb -> LATIN CAPITAL LETTER L WITH CARON - u'\u013e' # 0xbc -> LATIN SMALL LETTER L WITH CARON - u'\u0139' # 0xbd -> LATIN CAPITAL LETTER L WITH ACUTE - u'\u013a' # 0xbe -> LATIN SMALL LETTER L WITH ACUTE - u'\u0145' # 0xbf -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\u0146' # 0xc0 -> LATIN SMALL LETTER N WITH CEDILLA - u'\u0143' # 0xc1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\xac' # 0xc2 -> NOT SIGN - u'\u221a' # 0xc3 -> SQUARE ROOT - u'\u0144' # 0xc4 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0147' # 0xc5 -> LATIN CAPITAL LETTER N WITH CARON - u'\u2206' # 0xc6 -> INCREMENT - u'\xab' # 0xc7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xc8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xc9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xca -> NO-BREAK SPACE - u'\u0148' # 0xcb -> LATIN SMALL LETTER N WITH CARON - u'\u0150' # 0xcc -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd5' # 0xcd -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0151' # 0xce -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\u014c' # 0xcf -> LATIN CAPITAL LETTER O WITH MACRON - u'\u2013' # 0xd0 -> EN DASH - u'\u2014' # 0xd1 -> EM DASH - u'\u201c' # 0xd2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xd3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xd4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xd5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xd6 -> DIVISION SIGN - u'\u25ca' # 0xd7 -> LOZENGE - u'\u014d' # 0xd8 -> LATIN SMALL LETTER O WITH MACRON - u'\u0154' # 0xd9 -> LATIN CAPITAL LETTER R WITH ACUTE - u'\u0155' # 0xda -> LATIN SMALL LETTER R WITH ACUTE - u'\u0158' # 0xdb -> LATIN CAPITAL LETTER R WITH CARON - u'\u2039' # 0xdc -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xdd -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0159' # 0xde -> LATIN SMALL LETTER R WITH CARON - u'\u0156' # 0xdf -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\u0157' # 0xe0 -> LATIN SMALL LETTER R WITH CEDILLA - u'\u0160' # 0xe1 -> LATIN CAPITAL LETTER S WITH CARON - u'\u201a' # 0xe2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xe3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u0161' # 0xe4 -> LATIN SMALL LETTER S WITH CARON - u'\u015a' # 0xe5 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u015b' # 0xe6 -> LATIN SMALL LETTER S WITH ACUTE - u'\xc1' # 0xe7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\u0164' # 0xe8 -> LATIN CAPITAL LETTER T WITH CARON - u'\u0165' # 0xe9 -> LATIN SMALL LETTER T WITH CARON - u'\xcd' # 0xea -> LATIN CAPITAL LETTER I WITH ACUTE - u'\u017d' # 0xeb -> LATIN CAPITAL LETTER Z WITH CARON - u'\u017e' # 0xec -> LATIN SMALL LETTER Z WITH CARON - u'\u016a' # 0xed -> LATIN CAPITAL LETTER U WITH MACRON - u'\xd3' # 0xee -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xef -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u016b' # 0xf0 -> LATIN SMALL LETTER U WITH MACRON - u'\u016e' # 0xf1 -> LATIN CAPITAL LETTER U WITH RING ABOVE - u'\xda' # 0xf2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\u016f' # 0xf3 -> LATIN SMALL LETTER U WITH RING ABOVE - u'\u0170' # 0xf4 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\u0171' # 0xf5 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\u0172' # 0xf6 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\u0173' # 0xf7 -> LATIN SMALL LETTER U WITH OGONEK - u'\xdd' # 0xf8 -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xfd' # 0xf9 -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0137' # 0xfa -> LATIN SMALL LETTER K WITH CEDILLA - u'\u017b' # 0xfb -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u0141' # 0xfc -> LATIN CAPITAL LETTER L WITH STROKE - u'\u017c' # 0xfd -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u0122' # 0xfe -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u02c7' # 0xff -> CARON + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\u011a' # 0x9D -> LATIN CAPITAL LETTER E WITH CARON + u'\u011b' # 0x9E -> LATIN SMALL LETTER E WITH CARON + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\u0118' # 0xA2 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\u0119' # 0xAB -> LATIN SMALL LETTER E WITH OGONEK + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\u0123' # 0xAE -> LATIN SMALL LETTER G WITH CEDILLA + u'\u012e' # 0xAF -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u012f' # 0xB0 -> LATIN SMALL LETTER I WITH OGONEK + u'\u012a' # 0xB1 -> LATIN CAPITAL LETTER I WITH MACRON + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\u012b' # 0xB4 -> LATIN SMALL LETTER I WITH MACRON + u'\u0136' # 0xB5 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u0142' # 0xB8 -> LATIN SMALL LETTER L WITH STROKE + u'\u013b' # 0xB9 -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u013c' # 0xBA -> LATIN SMALL LETTER L WITH CEDILLA + u'\u013d' # 0xBB -> LATIN CAPITAL LETTER L WITH CARON + u'\u013e' # 0xBC -> LATIN SMALL LETTER L WITH CARON + u'\u0139' # 0xBD -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u013a' # 0xBE -> LATIN SMALL LETTER L WITH ACUTE + u'\u0145' # 0xBF -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u0146' # 0xC0 -> LATIN SMALL LETTER N WITH CEDILLA + u'\u0143' # 0xC1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0144' # 0xC4 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0147' # 0xC5 -> LATIN CAPITAL LETTER N WITH CARON + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\u0148' # 0xCB -> LATIN SMALL LETTER N WITH CARON + u'\u0150' # 0xCC -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0151' # 0xCE -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\u014c' # 0xCF -> LATIN CAPITAL LETTER O WITH MACRON + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\u014d' # 0xD8 -> LATIN SMALL LETTER O WITH MACRON + u'\u0154' # 0xD9 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\u0155' # 0xDA -> LATIN SMALL LETTER R WITH ACUTE + u'\u0158' # 0xDB -> LATIN CAPITAL LETTER R WITH CARON + u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0159' # 0xDE -> LATIN SMALL LETTER R WITH CARON + u'\u0156' # 0xDF -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\u0157' # 0xE0 -> LATIN SMALL LETTER R WITH CEDILLA + u'\u0160' # 0xE1 -> LATIN CAPITAL LETTER S WITH CARON + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u0161' # 0xE4 -> LATIN SMALL LETTER S WITH CARON + u'\u015a' # 0xE5 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u015b' # 0xE6 -> LATIN SMALL LETTER S WITH ACUTE + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\u0164' # 0xE8 -> LATIN CAPITAL LETTER T WITH CARON + u'\u0165' # 0xE9 -> LATIN SMALL LETTER T WITH CARON + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\u017d' # 0xEB -> LATIN CAPITAL LETTER Z WITH CARON + u'\u017e' # 0xEC -> LATIN SMALL LETTER Z WITH CARON + u'\u016a' # 0xED -> LATIN CAPITAL LETTER U WITH MACRON + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u016b' # 0xF0 -> LATIN SMALL LETTER U WITH MACRON + u'\u016e' # 0xF1 -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u016f' # 0xF3 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\u0170' # 0xF4 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\u0171' # 0xF5 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\u0172' # 0xF6 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u0173' # 0xF7 -> LATIN SMALL LETTER U WITH OGONEK + u'\xdd' # 0xF8 -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xfd' # 0xF9 -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0137' # 0xFA -> LATIN SMALL LETTER K WITH CEDILLA + u'\u017b' # 0xFB -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u0141' # 0xFC -> LATIN CAPITAL LETTER L WITH STROKE + u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u0122' # 0xFE -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u02c7' # 0xFF -> CARON ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # CONTROL CHARACTER 0x0008: 0x08, # CONTROL CHARACTER 0x0009: 0x09, # CONTROL CHARACTER - 0x000a: 0x0a, # CONTROL CHARACTER - 0x000b: 0x0b, # CONTROL CHARACTER - 0x000c: 0x0c, # CONTROL CHARACTER - 0x000d: 0x0d, # CONTROL CHARACTER - 0x000e: 0x0e, # CONTROL CHARACTER - 0x000f: 0x0f, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER 0x0010: 0x10, # CONTROL CHARACTER 0x0011: 0x11, # CONTROL CHARACTER 0x0012: 0x12, # CONTROL CHARACTER @@ -319,12 +319,12 @@ 0x0017: 0x17, # CONTROL CHARACTER 0x0018: 0x18, # CONTROL CHARACTER 0x0019: 0x19, # CONTROL CHARACTER - 0x001a: 0x1a, # CONTROL CHARACTER - 0x001b: 0x1b, # CONTROL CHARACTER - 0x001c: 0x1c, # CONTROL CHARACTER - 0x001d: 0x1d, # CONTROL CHARACTER - 0x001e: 0x1e, # CONTROL CHARACTER - 0x001f: 0x1f, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,138 +415,139 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # CONTROL CHARACTER - 0x00a0: 0xca, # NO-BREAK SPACE - 0x00a3: 0xa3, # POUND SIGN - 0x00a7: 0xa4, # SECTION SIGN - 0x00a8: 0xac, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00ab: 0xc7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xc2, # NOT SIGN - 0x00ae: 0xa8, # REGISTERED SIGN - 0x00b0: 0xa1, # DEGREE SIGN - 0x00b6: 0xa6, # PILCROW SIGN - 0x00bb: 0xc8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c1: 0xe7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00cd: 0xea, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d3: 0xee, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xef, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xcd, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00da: 0xf2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00dc: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0xf8, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00df: 0xa7, # LATIN SMALL LETTER SHARP S - 0x00e1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00e4: 0x8a, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e9: 0x8e, # LATIN SMALL LETTER E WITH ACUTE - 0x00ed: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00f3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x9b, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x9a, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xd6, # DIVISION SIGN - 0x00fa: 0x9c, # LATIN SMALL LETTER U WITH ACUTE - 0x00fc: 0x9f, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0xf9, # LATIN SMALL LETTER Y WITH ACUTE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A3: 0xA3, # POUND SIGN + 0x00A7: 0xA4, # SECTION SIGN + 0x00A8: 0xAC, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00B0: 0xA1, # DEGREE SIGN + 0x00B6: 0xA6, # PILCROW SIGN + 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xF8, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S + 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE + 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xD6, # DIVISION SIGN + 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xF9, # LATIN SMALL LETTER Y WITH ACUTE 0x0100: 0x81, # LATIN CAPITAL LETTER A WITH MACRON 0x0101: 0x82, # LATIN SMALL LETTER A WITH MACRON 0x0104: 0x84, # LATIN CAPITAL LETTER A WITH OGONEK 0x0105: 0x88, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0x8c, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0x8d, # LATIN SMALL LETTER C WITH ACUTE - 0x010c: 0x89, # LATIN CAPITAL LETTER C WITH CARON - 0x010d: 0x8b, # LATIN SMALL LETTER C WITH CARON - 0x010e: 0x91, # LATIN CAPITAL LETTER D WITH CARON - 0x010f: 0x93, # LATIN SMALL LETTER D WITH CARON + 0x0106: 0x8C, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x8D, # LATIN SMALL LETTER C WITH ACUTE + 0x010C: 0x89, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0x8B, # LATIN SMALL LETTER C WITH CARON + 0x010E: 0x91, # LATIN CAPITAL LETTER D WITH CARON + 0x010F: 0x93, # LATIN SMALL LETTER D WITH CARON 0x0112: 0x94, # LATIN CAPITAL LETTER E WITH MACRON 0x0113: 0x95, # LATIN SMALL LETTER E WITH MACRON 0x0116: 0x96, # LATIN CAPITAL LETTER E WITH DOT ABOVE 0x0117: 0x98, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0xa2, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xab, # LATIN SMALL LETTER E WITH OGONEK - 0x011a: 0x9d, # LATIN CAPITAL LETTER E WITH CARON - 0x011b: 0x9e, # LATIN SMALL LETTER E WITH CARON - 0x0122: 0xfe, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0xae, # LATIN SMALL LETTER G WITH CEDILLA - 0x012a: 0xb1, # LATIN CAPITAL LETTER I WITH MACRON - 0x012b: 0xb4, # LATIN SMALL LETTER I WITH MACRON - 0x012e: 0xaf, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012f: 0xb0, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0xb5, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0xfa, # LATIN SMALL LETTER K WITH CEDILLA - 0x0139: 0xbd, # LATIN CAPITAL LETTER L WITH ACUTE - 0x013a: 0xbe, # LATIN SMALL LETTER L WITH ACUTE - 0x013b: 0xb9, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013c: 0xba, # LATIN SMALL LETTER L WITH CEDILLA - 0x013d: 0xbb, # LATIN CAPITAL LETTER L WITH CARON - 0x013e: 0xbc, # LATIN SMALL LETTER L WITH CARON - 0x0141: 0xfc, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xb8, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xc1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xc4, # LATIN SMALL LETTER N WITH ACUTE - 0x0145: 0xbf, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0xc0, # LATIN SMALL LETTER N WITH CEDILLA - 0x0147: 0xc5, # LATIN CAPITAL LETTER N WITH CARON - 0x0148: 0xcb, # LATIN SMALL LETTER N WITH CARON - 0x014c: 0xcf, # LATIN CAPITAL LETTER O WITH MACRON - 0x014d: 0xd8, # LATIN SMALL LETTER O WITH MACRON - 0x0150: 0xcc, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x0151: 0xce, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x0154: 0xd9, # LATIN CAPITAL LETTER R WITH ACUTE - 0x0155: 0xda, # LATIN SMALL LETTER R WITH ACUTE - 0x0156: 0xdf, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x0157: 0xe0, # LATIN SMALL LETTER R WITH CEDILLA - 0x0158: 0xdb, # LATIN CAPITAL LETTER R WITH CARON - 0x0159: 0xde, # LATIN SMALL LETTER R WITH CARON - 0x015a: 0xe5, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015b: 0xe6, # LATIN SMALL LETTER S WITH ACUTE - 0x0160: 0xe1, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xe4, # LATIN SMALL LETTER S WITH CARON - 0x0164: 0xe8, # LATIN CAPITAL LETTER T WITH CARON - 0x0165: 0xe9, # LATIN SMALL LETTER T WITH CARON - 0x016a: 0xed, # LATIN CAPITAL LETTER U WITH MACRON - 0x016b: 0xf0, # LATIN SMALL LETTER U WITH MACRON - 0x016e: 0xf1, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x016f: 0xf3, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0170: 0xf4, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x0171: 0xf5, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x0172: 0xf6, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0xf7, # LATIN SMALL LETTER U WITH OGONEK - 0x0179: 0x8f, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017a: 0x90, # LATIN SMALL LETTER Z WITH ACUTE - 0x017b: 0xfb, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017c: 0xfd, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017d: 0xeb, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0xec, # LATIN SMALL LETTER Z WITH CARON - 0x02c7: 0xff, # CARON - 0x2013: 0xd0, # EN DASH - 0x2014: 0xd1, # EM DASH - 0x2018: 0xd4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xd5, # RIGHT SINGLE QUOTATION MARK - 0x201a: 0xe2, # SINGLE LOW-9 QUOTATION MARK - 0x201c: 0xd2, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0xd3, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0xe3, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xa0, # DAGGER - 0x2022: 0xa5, # BULLET - 0x2026: 0xc9, # HORIZONTAL ELLIPSIS - 0x2039: 0xdc, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203a: 0xdd, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x2122: 0xaa, # TRADE MARK SIGN - 0x2202: 0xb6, # PARTIAL DIFFERENTIAL - 0x2206: 0xc6, # INCREMENT - 0x2211: 0xb7, # N-ARY SUMMATION - 0x221a: 0xc3, # SQUARE ROOT - 0x2260: 0xad, # NOT EQUAL TO - 0x2264: 0xb2, # LESS-THAN OR EQUAL TO - 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO - 0x25ca: 0xd7, # LOZENGE -} \ No newline at end of file + 0x0118: 0xA2, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0xAB, # LATIN SMALL LETTER E WITH OGONEK + 0x011A: 0x9D, # LATIN CAPITAL LETTER E WITH CARON + 0x011B: 0x9E, # LATIN SMALL LETTER E WITH CARON + 0x0122: 0xFE, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0xAE, # LATIN SMALL LETTER G WITH CEDILLA + 0x012A: 0xB1, # LATIN CAPITAL LETTER I WITH MACRON + 0x012B: 0xB4, # LATIN SMALL LETTER I WITH MACRON + 0x012E: 0xAF, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012F: 0xB0, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0xB5, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0xFA, # LATIN SMALL LETTER K WITH CEDILLA + 0x0139: 0xBD, # LATIN CAPITAL LETTER L WITH ACUTE + 0x013A: 0xBE, # LATIN SMALL LETTER L WITH ACUTE + 0x013B: 0xB9, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013C: 0xBA, # LATIN SMALL LETTER L WITH CEDILLA + 0x013D: 0xBB, # LATIN CAPITAL LETTER L WITH CARON + 0x013E: 0xBC, # LATIN SMALL LETTER L WITH CARON + 0x0141: 0xFC, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0xB8, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0xC1, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0xC4, # LATIN SMALL LETTER N WITH ACUTE + 0x0145: 0xBF, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0xC0, # LATIN SMALL LETTER N WITH CEDILLA + 0x0147: 0xC5, # LATIN CAPITAL LETTER N WITH CARON + 0x0148: 0xCB, # LATIN SMALL LETTER N WITH CARON + 0x014C: 0xCF, # LATIN CAPITAL LETTER O WITH MACRON + 0x014D: 0xD8, # LATIN SMALL LETTER O WITH MACRON + 0x0150: 0xCC, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0xCE, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0154: 0xD9, # LATIN CAPITAL LETTER R WITH ACUTE + 0x0155: 0xDA, # LATIN SMALL LETTER R WITH ACUTE + 0x0156: 0xDF, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0xE0, # LATIN SMALL LETTER R WITH CEDILLA + 0x0158: 0xDB, # LATIN CAPITAL LETTER R WITH CARON + 0x0159: 0xDE, # LATIN SMALL LETTER R WITH CARON + 0x015A: 0xE5, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015B: 0xE6, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0xE1, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xE4, # LATIN SMALL LETTER S WITH CARON + 0x0164: 0xE8, # LATIN CAPITAL LETTER T WITH CARON + 0x0165: 0xE9, # LATIN SMALL LETTER T WITH CARON + 0x016A: 0xED, # LATIN CAPITAL LETTER U WITH MACRON + 0x016B: 0xF0, # LATIN SMALL LETTER U WITH MACRON + 0x016E: 0xF1, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x016F: 0xF3, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0170: 0xF4, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0xF5, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0172: 0xF6, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0xF7, # LATIN SMALL LETTER U WITH OGONEK + 0x0179: 0x8F, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017A: 0x90, # LATIN SMALL LETTER Z WITH ACUTE + 0x017B: 0xFB, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017C: 0xFD, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017D: 0xEB, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xEC, # LATIN SMALL LETTER Z WITH CARON + 0x02C7: 0xFF, # CARON + 0x2013: 0xD0, # EN DASH + 0x2014: 0xD1, # EM DASH + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0xA0, # DAGGER + 0x2022: 0xA5, # BULLET + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS + 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x2122: 0xAA, # TRADE MARK SIGN + 0x2202: 0xB6, # PARTIAL DIFFERENTIAL + 0x2206: 0xC6, # INCREMENT + 0x2211: 0xB7, # N-ARY SUMMATION + 0x221A: 0xC3, # SQUARE ROOT + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO + 0x25CA: 0xD7, # LOZENGE +} + Index: mac_croatian.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_croatian.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- mac_croatian.py 24 Oct 2005 12:07:48 -0000 1.2 +++ mac_croatian.py 24 Oct 2005 12:14:59 -0000 1.3 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> CONTROL CHARACTER u'\x08' # 0x08 -> CONTROL CHARACTER u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0a -> CONTROL CHARACTER - u'\x0b' # 0x0b -> CONTROL CHARACTER - u'\x0c' # 0x0c -> CONTROL CHARACTER - u'\r' # 0x0d -> CONTROL CHARACTER - u'\x0e' # 0x0e -> CONTROL CHARACTER - u'\x0f' # 0x0f -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER u'\x10' # 0x10 -> CONTROL CHARACTER u'\x11' # 0x11 -> CONTROL CHARACTER u'\x12' # 0x12 -> CONTROL CHARACTER @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> CONTROL CHARACTER u'\x18' # 0x18 -> CONTROL CHARACTER u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1a -> CONTROL CHARACTER - u'\x1b' # 0x1b -> CONTROL CHARACTER - u'\x1c' # 0x1c -> CONTROL CHARACTER - u'\x1d' # 0x1d -> CONTROL CHARACTER - u'\x1e' # 0x1e -> CONTROL CHARACTER - u'\x1f' # 0x1f -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> CONTROL CHARACTER + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA @@ -170,12 +170,12 @@ u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8a -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8b -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8c -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8d -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8e -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8f -> LATIN SMALL LETTER E WITH GRAVE + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE @@ -186,108 +186,108 @@ u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9a -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9b -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9c -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9d -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9e -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9f -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xa0 -> DAGGER - u'\xb0' # 0xa1 -> DEGREE SIGN - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa7' # 0xa4 -> SECTION SIGN - u'\u2022' # 0xa5 -> BULLET - u'\xb6' # 0xa6 -> PILCROW SIGN - u'\xdf' # 0xa7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xa8 -> REGISTERED SIGN - u'\u0160' # 0xa9 -> LATIN CAPITAL LETTER S WITH CARON - u'\u2122' # 0xaa -> TRADE MARK SIGN - u'\xb4' # 0xab -> ACUTE ACCENT - u'\xa8' # 0xac -> DIAERESIS - u'\u2260' # 0xad -> NOT EQUAL TO - u'\u017d' # 0xae -> LATIN CAPITAL LETTER Z WITH CARON - u'\xd8' # 0xaf -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xb0 -> INFINITY - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\u2264' # 0xb2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xb3 -> GREATER-THAN OR EQUAL TO - u'\u2206' # 0xb4 -> INCREMENT - u'\xb5' # 0xb5 -> MICRO SIGN - u'\u2202' # 0xb6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xb7 -> N-ARY SUMMATION - u'\u220f' # 0xb8 -> N-ARY PRODUCT - u'\u0161' # 0xb9 -> LATIN SMALL LETTER S WITH CARON - u'\u222b' # 0xba -> INTEGRAL - u'\xaa' # 0xbb -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xbc -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xbd -> GREEK CAPITAL LETTER OMEGA - u'\u017e' # 0xbe -> LATIN SMALL LETTER Z WITH CARON - u'\xf8' # 0xbf -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xc0 -> INVERTED QUESTION MARK - u'\xa1' # 0xc1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xc2 -> NOT SIGN - u'\u221a' # 0xc3 -> SQUARE ROOT - u'\u0192' # 0xc4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xc5 -> ALMOST EQUAL TO - u'\u0106' # 0xc6 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xab' # 0xc7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u010c' # 0xc8 -> LATIN CAPITAL LETTER C WITH CARON - u'\u2026' # 0xc9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xca -> NO-BREAK SPACE - u'\xc0' # 0xcb -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xcc -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xcd -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xce -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xcf -> LATIN SMALL LIGATURE OE - u'\u0110' # 0xd0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u2014' # 0xd1 -> EM DASH - u'\u201c' # 0xd2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xd3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xd4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xd5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xd6 -> DIVISION SIGN - u'\u25ca' # 0xd7 -> LOZENGE - u'\uf8ff' # 0xd8 -> Apple logo - u'\xa9' # 0xd9 -> COPYRIGHT SIGN - u'\u2044' # 0xda -> FRACTION SLASH - u'\u20ac' # 0xdb -> EURO SIGN - u'\u2039' # 0xdc -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xdd -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\xc6' # 0xde -> LATIN CAPITAL LETTER AE - u'\xbb' # 0xdf -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2013' # 0xe0 -> EN DASH - u'\xb7' # 0xe1 -> MIDDLE DOT - u'\u201a' # 0xe2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xe3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xe4 -> PER MILLE SIGN - u'\xc2' # 0xe5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0107' # 0xe6 -> LATIN SMALL LETTER C WITH ACUTE - u'\xc1' # 0xe7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\u010d' # 0xe8 -> LATIN SMALL LETTER C WITH CARON - u'\xc8' # 0xe9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xea -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xeb -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xec -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xed -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xee -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xef -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0111' # 0xf0 -> LATIN SMALL LETTER D WITH STROKE - u'\xd2' # 0xf1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xf2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xf3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xf4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xf5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xf6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xf7 -> SMALL TILDE - u'\xaf' # 0xf8 -> MACRON - u'\u03c0' # 0xf9 -> GREEK SMALL LETTER PI - u'\xcb' # 0xfa -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u02da' # 0xfb -> RING ABOVE - u'\xb8' # 0xfc -> CEDILLA - u'\xca' # 0xfd -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xe6' # 0xfe -> LATIN SMALL LETTER AE - u'\u02c7' # 0xff -> CARON + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\xb4' # 0xAB -> ACUTE ACCENT + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON + u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\u2206' # 0xB4 -> INCREMENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u220f' # 0xB8 -> N-ARY PRODUCT + u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON + u'\u222b' # 0xBA -> INTEGRAL + u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR + u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA + u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON + u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0xC0 -> INVERTED QUESTION MARK + u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\uf8ff' # 0xD8 -> Apple logo + u'\xa9' # 0xD9 -> COPYRIGHT SIGN + u'\u2044' # 0xDA -> FRACTION SLASH + u'\u20ac' # 0xDB -> EURO SIGN + u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\xc6' # 0xDE -> LATIN CAPITAL LETTER AE + u'\xbb' # 0xDF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2013' # 0xE0 -> EN DASH + u'\xb7' # 0xE1 -> MIDDLE DOT + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0xE4 -> PER MILLE SIGN + u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0xF7 -> SMALL TILDE + u'\xaf' # 0xF8 -> MACRON + u'\u03c0' # 0xF9 -> GREEK SMALL LETTER PI + u'\xcb' # 0xFA -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u02da' # 0xFB -> RING ABOVE + u'\xb8' # 0xFC -> CEDILLA + u'\xca' # 0xFD -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xe6' # 0xFE -> LATIN SMALL LETTER AE + u'\u02c7' # 0xFF -> CARON ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # CONTROL CHARACTER 0x0008: 0x08, # CONTROL CHARACTER 0x0009: 0x09, # CONTROL CHARACTER - 0x000a: 0x0a, # CONTROL CHARACTER - 0x000b: 0x0b, # CONTROL CHARACTER - 0x000c: 0x0c, # CONTROL CHARACTER - 0x000d: 0x0d, # CONTROL CHARACTER - 0x000e: 0x0e, # CONTROL CHARACTER - 0x000f: 0x0f, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER 0x0010: 0x10, # CONTROL CHARACTER 0x0011: 0x11, # CONTROL CHARACTER 0x0012: 0x12, # CONTROL CHARACTER @@ -319,12 +319,12 @@ 0x0017: 0x17, # CONTROL CHARACTER 0x0018: 0x18, # CONTROL CHARACTER 0x0019: 0x19, # CONTROL CHARACTER - 0x001a: 0x1a, # CONTROL CHARACTER - 0x001b: 0x1b, # CONTROL CHARACTER - 0x001c: 0x1c, # CONTROL CHARACTER - 0x001d: 0x1d, # CONTROL CHARACTER - 0x001e: 0x1e, # CONTROL CHARACTER - 0x001f: 0x1f, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,138 +415,139 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # CONTROL CHARACTER - 0x00a0: 0xca, # NO-BREAK SPACE - 0x00a1: 0xc1, # INVERTED EXCLAMATION MARK - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a7: 0xa4, # SECTION SIGN - 0x00a8: 0xac, # DIAERESIS - 0x00a9: 0xd9, # COPYRIGHT SIGN - 0x00aa: 0xbb, # FEMININE ORDINAL INDICATOR - 0x00ab: 0xc7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xc2, # NOT SIGN - 0x00ae: 0xa8, # REGISTERED SIGN - 0x00af: 0xf8, # MACRON - 0x00b0: 0xa1, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b4: 0xab, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xa6, # PILCROW SIGN - 0x00b7: 0xe1, # MIDDLE DOT - 0x00b8: 0xfc, # CEDILLA - 0x00ba: 0xbc, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0xdf, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bf: 0xc0, # INVERTED QUESTION MARK - 0x00c0: 0xcb, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0xe7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xe5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0xcc, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0xde, # LATIN CAPITAL LETTER AE - 0x00c7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0xe9, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0xfd, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0xfa, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0xed, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0xea, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xeb, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0xec, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xf1, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xee, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xef, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xcd, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d8: 0xaf, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0xf4, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xf2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xf3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0xa7, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x8b, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x8a, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x8c, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0xfe, # LATIN SMALL LETTER AE - 0x00e7: 0x8d, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x8f, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x8e, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x93, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x98, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x9b, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x9a, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xd6, # DIVISION SIGN - 0x00f8: 0xbf, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0x9d, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x9c, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x9e, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x9f, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0106: 0xc6, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xe6, # LATIN SMALL LETTER C WITH ACUTE - 0x010c: 0xc8, # LATIN CAPITAL LETTER C WITH CARON - 0x010d: 0xe8, # LATIN SMALL LETTER C WITH CARON - 0x0110: 0xd0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xf0, # LATIN SMALL LETTER D WITH STROKE - 0x0131: 0xf5, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0xce, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xcf, # LATIN SMALL LIGATURE OE - 0x0160: 0xa9, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xb9, # LATIN SMALL LETTER S WITH CARON - 0x017d: 0xae, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0xbe, # LATIN SMALL LETTER Z WITH CARON - 0x0192: 0xc4, # LATIN SMALL LETTER F WITH HOOK - 0x02c6: 0xf6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02c7: 0xff, # CARON - 0x02da: 0xfb, # RING ABOVE - 0x02dc: 0xf7, # SMALL TILDE - 0x03a9: 0xbd, # GREEK CAPITAL LETTER OMEGA - 0x03c0: 0xf9, # GREEK SMALL LETTER PI - 0x2013: 0xe0, # EN DASH - 0x2014: 0xd1, # EM DASH - 0x2018: 0xd4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xd5, # RIGHT SINGLE QUOTATION MARK - 0x201a: 0xe2, # SINGLE LOW-9 QUOTATION MARK - 0x201c: 0xd2, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0xd3, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0xe3, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xa0, # DAGGER - 0x2022: 0xa5, # BULLET - 0x2026: 0xc9, # HORIZONTAL ELLIPSIS - 0x2030: 0xe4, # PER MILLE SIGN - 0x2039: 0xdc, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203a: 0xdd, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x2044: 0xda, # FRACTION SLASH - 0x20ac: 0xdb, # EURO SIGN - 0x2122: 0xaa, # TRADE MARK SIGN - 0x2202: 0xb6, # PARTIAL DIFFERENTIAL - 0x2206: 0xb4, # INCREMENT - 0x220f: 0xb8, # N-ARY PRODUCT - 0x2211: 0xb7, # N-ARY SUMMATION - 0x221a: 0xc3, # SQUARE ROOT - 0x221e: 0xb0, # INFINITY - 0x222b: 0xba, # INTEGRAL - 0x2248: 0xc5, # ALMOST EQUAL TO - 0x2260: 0xad, # NOT EQUAL TO - 0x2264: 0xb2, # LESS-THAN OR EQUAL TO - 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO - 0x25ca: 0xd7, # LOZENGE - 0xf8ff: 0xd8, # Apple logo -} \ No newline at end of file + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A7: 0xA4, # SECTION SIGN + 0x00A8: 0xAC, # DIAERESIS + 0x00A9: 0xD9, # COPYRIGHT SIGN + 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00AF: 0xF8, # MACRON + 0x00B0: 0xA1, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B4: 0xAB, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xA6, # PILCROW SIGN + 0x00B7: 0xE1, # MIDDLE DOT + 0x00B8: 0xFC, # CEDILLA + 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xDF, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BF: 0xC0, # INVERTED QUESTION MARK + 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xDE, # LATIN CAPITAL LETTER AE + 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xFD, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xFA, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S + 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xFE, # LATIN SMALL LETTER AE + 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xD6, # DIVISION SIGN + 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0106: 0xC6, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0xE6, # LATIN SMALL LETTER C WITH ACUTE + 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON + 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON + 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE + 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE + 0x0153: 0xCF, # LATIN SMALL LIGATURE OE + 0x0160: 0xA9, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0xB9, # LATIN SMALL LETTER S WITH CARON + 0x017D: 0xAE, # LATIN CAPITAL LETTER Z WITH CARON + 0x017E: 0xBE, # LATIN SMALL LETTER Z WITH CARON + 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK + 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02C7: 0xFF, # CARON + 0x02DA: 0xFB, # RING ABOVE + 0x02DC: 0xF7, # SMALL TILDE + 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA + 0x03C0: 0xF9, # GREEK SMALL LETTER PI + 0x2013: 0xE0, # EN DASH + 0x2014: 0xD1, # EM DASH + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0xA0, # DAGGER + 0x2022: 0xA5, # BULLET + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS + 0x2030: 0xE4, # PER MILLE SIGN + 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x2044: 0xDA, # FRACTION SLASH + 0x20AC: 0xDB, # EURO SIGN + 0x2122: 0xAA, # TRADE MARK SIGN + 0x2202: 0xB6, # PARTIAL DIFFERENTIAL + 0x2206: 0xB4, # INCREMENT + 0x220F: 0xB8, # N-ARY PRODUCT + 0x2211: 0xB7, # N-ARY SUMMATION + 0x221A: 0xC3, # SQUARE ROOT + 0x221E: 0xB0, # INFINITY + 0x222B: 0xBA, # INTEGRAL + 0x2248: 0xC5, # ALMOST EQUAL TO + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO + 0x25CA: 0xD7, # LOZENGE + 0xF8FF: 0xD8, # Apple logo +} + Index: mac_cyrillic.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_cyrillic.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- mac_cyrillic.py 24 Oct 2005 12:07:48 -0000 1.6 +++ mac_cyrillic.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> CONTROL CHARACTER u'\x08' # 0x08 -> CONTROL CHARACTER u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0a -> CONTROL CHARACTER - u'\x0b' # 0x0b -> CONTROL CHARACTER - u'\x0c' # 0x0c -> CONTROL CHARACTER - u'\r' # 0x0d -> CONTROL CHARACTER - u'\x0e' # 0x0e -> CONTROL CHARACTER - u'\x0f' # 0x0f -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER u'\x10' # 0x10 -> CONTROL CHARACTER u'\x11' # 0x11 -> CONTROL CHARACTER u'\x12' # 0x12 -> CONTROL CHARACTER @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> CONTROL CHARACTER u'\x18' # 0x18 -> CONTROL CHARACTER u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1a -> CONTROL CHARACTER - u'\x1b' # 0x1b -> CONTROL CHARACTER - u'\x1c' # 0x1c -> CONTROL CHARACTER - u'\x1d' # 0x1d -> CONTROL CHARACTER - u'\x1e' # 0x1e -> CONTROL CHARACTER - u'\x1f' # 0x1f -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> CONTROL CHARACTER + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER u'\u0410' # 0x80 -> CYRILLIC CAPITAL LETTER A u'\u0411' # 0x81 -> CYRILLIC CAPITAL LETTER BE u'\u0412' # 0x82 -> CYRILLIC CAPITAL LETTER VE @@ -170,12 +170,12 @@ u'\u0417' # 0x87 -> CYRILLIC CAPITAL LETTER ZE u'\u0418' # 0x88 -> CYRILLIC CAPITAL LETTER I u'\u0419' # 0x89 -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0x8a -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0x8b -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0x8c -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0x8d -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0x8e -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0x8f -> CYRILLIC CAPITAL LETTER PE + u'\u041a' # 0x8A -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0x8B -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0x8C -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0x8D -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0x8E -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0x8F -> CYRILLIC CAPITAL LETTER PE u'\u0420' # 0x90 -> CYRILLIC CAPITAL LETTER ER u'\u0421' # 0x91 -> CYRILLIC CAPITAL LETTER ES u'\u0422' # 0x92 -> CYRILLIC CAPITAL LETTER TE @@ -186,108 +186,108 @@ u'\u0427' # 0x97 -> CYRILLIC CAPITAL LETTER CHE u'\u0428' # 0x98 -> CYRILLIC CAPITAL LETTER SHA u'\u0429' # 0x99 -> CYRILLIC CAPITAL LETTER SHCHA - u'\u042a' # 0x9a -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u042b' # 0x9b -> CYRILLIC CAPITAL LETTER YERU - u'\u042c' # 0x9c -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042d' # 0x9d -> CYRILLIC CAPITAL LETTER E - u'\u042e' # 0x9e -> CYRILLIC CAPITAL LETTER YU - u'\u042f' # 0x9f -> CYRILLIC CAPITAL LETTER YA - u'\u2020' # 0xa0 -> DAGGER - u'\xb0' # 0xa1 -> DEGREE SIGN - u'\u0490' # 0xa2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa7' # 0xa4 -> SECTION SIGN - u'\u2022' # 0xa5 -> BULLET - u'\xb6' # 0xa6 -> PILCROW SIGN - u'\u0406' # 0xa7 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\xae' # 0xa8 -> REGISTERED SIGN - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\u2122' # 0xaa -> TRADE MARK SIGN - u'\u0402' # 0xab -> CYRILLIC CAPITAL LETTER DJE - u'\u0452' # 0xac -> CYRILLIC SMALL LETTER DJE - u'\u2260' # 0xad -> NOT EQUAL TO - u'\u0403' # 0xae -> CYRILLIC CAPITAL LETTER GJE - u'\u0453' # 0xaf -> CYRILLIC SMALL LETTER GJE - u'\u221e' # 0xb0 -> INFINITY - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\u2264' # 0xb2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xb3 -> GREATER-THAN OR EQUAL TO - u'\u0456' # 0xb4 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\xb5' # 0xb5 -> MICRO SIGN - u'\u0491' # 0xb6 -> CYRILLIC SMALL LETTER GHE WITH UPTURN - u'\u0408' # 0xb7 -> CYRILLIC CAPITAL LETTER JE - u'\u0404' # 0xb8 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u0454' # 0xb9 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u0407' # 0xba -> CYRILLIC CAPITAL LETTER YI - u'\u0457' # 0xbb -> CYRILLIC SMALL LETTER YI - u'\u0409' # 0xbc -> CYRILLIC CAPITAL LETTER LJE - u'\u0459' # 0xbd -> CYRILLIC SMALL LETTER LJE - u'\u040a' # 0xbe -> CYRILLIC CAPITAL LETTER NJE - u'\u045a' # 0xbf -> CYRILLIC SMALL LETTER NJE - u'\u0458' # 0xc0 -> CYRILLIC SMALL LETTER JE - u'\u0405' # 0xc1 -> CYRILLIC CAPITAL LETTER DZE - u'\xac' # 0xc2 -> NOT SIGN - u'\u221a' # 0xc3 -> SQUARE ROOT - u'\u0192' # 0xc4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xc5 -> ALMOST EQUAL TO - u'\u2206' # 0xc6 -> INCREMENT - u'\xab' # 0xc7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xc8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xc9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xca -> NO-BREAK SPACE - u'\u040b' # 0xcb -> CYRILLIC CAPITAL LETTER TSHE - u'\u045b' # 0xcc -> CYRILLIC SMALL LETTER TSHE - u'\u040c' # 0xcd -> CYRILLIC CAPITAL LETTER KJE - u'\u045c' # 0xce -> CYRILLIC SMALL LETTER KJE - u'\u0455' # 0xcf -> CYRILLIC SMALL LETTER DZE - u'\u2013' # 0xd0 -> EN DASH - u'\u2014' # 0xd1 -> EM DASH - u'\u201c' # 0xd2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xd3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xd4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xd5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xd6 -> DIVISION SIGN - u'\u201e' # 0xd7 -> DOUBLE LOW-9 QUOTATION MARK - u'\u040e' # 0xd8 -> CYRILLIC CAPITAL LETTER SHORT U - u'\u045e' # 0xd9 -> CYRILLIC SMALL LETTER SHORT U - u'\u040f' # 0xda -> CYRILLIC CAPITAL LETTER DZHE - u'\u045f' # 0xdb -> CYRILLIC SMALL LETTER DZHE - u'\u2116' # 0xdc -> NUMERO SIGN - u'\u0401' # 0xdd -> CYRILLIC CAPITAL LETTER IO - u'\u0451' # 0xde -> CYRILLIC SMALL LETTER IO - u'\u044f' # 0xdf -> CYRILLIC SMALL LETTER YA - u'\u0430' # 0xe0 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xe1 -> CYRILLIC SMALL LETTER BE - u'\u0432' # 0xe2 -> CYRILLIC SMALL LETTER VE - u'\u0433' # 0xe3 -> CYRILLIC SMALL LETTER GHE - u'\u0434' # 0xe4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xe5 -> CYRILLIC SMALL LETTER IE - u'\u0436' # 0xe6 -> CYRILLIC SMALL LETTER ZHE - u'\u0437' # 0xe7 -> CYRILLIC SMALL LETTER ZE - u'\u0438' # 0xe8 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xe9 -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xea -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xeb -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xec -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xed -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xee -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xef -> CYRILLIC SMALL LETTER PE - u'\u0440' # 0xf0 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xf1 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xf2 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xf3 -> CYRILLIC SMALL LETTER U - u'\u0444' # 0xf4 -> CYRILLIC SMALL LETTER EF - u'\u0445' # 0xf5 -> CYRILLIC SMALL LETTER HA - u'\u0446' # 0xf6 -> CYRILLIC SMALL LETTER TSE - u'\u0447' # 0xf7 -> CYRILLIC SMALL LETTER CHE - u'\u0448' # 0xf8 -> CYRILLIC SMALL LETTER SHA - u'\u0449' # 0xf9 -> CYRILLIC SMALL LETTER SHCHA - u'\u044a' # 0xfa -> CYRILLIC SMALL LETTER HARD SIGN - u'\u044b' # 0xfb -> CYRILLIC SMALL LETTER YERU - u'\u044c' # 0xfc -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044d' # 0xfd -> CYRILLIC SMALL LETTER E - u'\u044e' # 0xfe -> CYRILLIC SMALL LETTER YU - u'\u20ac' # 0xff -> EURO SIGN + u'\u042a' # 0x9A -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0x9B -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0x9C -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0x9D -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0x9E -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0x9F -> CYRILLIC CAPITAL LETTER YA + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\u0490' # 0xA2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\u0406' # 0xA7 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\u0402' # 0xAB -> CYRILLIC CAPITAL LETTER DJE + u'\u0452' # 0xAC -> CYRILLIC SMALL LETTER DJE + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\u0403' # 0xAE -> CYRILLIC CAPITAL LETTER GJE + u'\u0453' # 0xAF -> CYRILLIC SMALL LETTER GJE + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\u0456' # 0xB4 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u0491' # 0xB6 -> CYRILLIC SMALL LETTER GHE WITH UPTURN + u'\u0408' # 0xB7 -> CYRILLIC CAPITAL LETTER JE + u'\u0404' # 0xB8 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0454' # 0xB9 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0407' # 0xBA -> CYRILLIC CAPITAL LETTER YI + u'\u0457' # 0xBB -> CYRILLIC SMALL LETTER YI + u'\u0409' # 0xBC -> CYRILLIC CAPITAL LETTER LJE + u'\u0459' # 0xBD -> CYRILLIC SMALL LETTER LJE + u'\u040a' # 0xBE -> CYRILLIC CAPITAL LETTER NJE + u'\u045a' # 0xBF -> CYRILLIC SMALL LETTER NJE + u'\u0458' # 0xC0 -> CYRILLIC SMALL LETTER JE + u'\u0405' # 0xC1 -> CYRILLIC CAPITAL LETTER DZE + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\u040b' # 0xCB -> CYRILLIC CAPITAL LETTER TSHE + u'\u045b' # 0xCC -> CYRILLIC SMALL LETTER TSHE + u'\u040c' # 0xCD -> CYRILLIC CAPITAL LETTER KJE + u'\u045c' # 0xCE -> CYRILLIC SMALL LETTER KJE + u'\u0455' # 0xCF -> CYRILLIC SMALL LETTER DZE + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u201e' # 0xD7 -> DOUBLE LOW-9 QUOTATION MARK + u'\u040e' # 0xD8 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045e' # 0xD9 -> CYRILLIC SMALL LETTER SHORT U + u'\u040f' # 0xDA -> CYRILLIC CAPITAL LETTER DZHE + u'\u045f' # 0xDB -> CYRILLIC SMALL LETTER DZHE + u'\u2116' # 0xDC -> NUMERO SIGN + u'\u0401' # 0xDD -> CYRILLIC CAPITAL LETTER IO + u'\u0451' # 0xDE -> CYRILLIC SMALL LETTER IO + u'\u044f' # 0xDF -> CYRILLIC SMALL LETTER YA + u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE + u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E + u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU + u'\u20ac' # 0xFF -> EURO SIGN ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # CONTROL CHARACTER 0x0008: 0x08, # CONTROL CHARACTER 0x0009: 0x09, # CONTROL CHARACTER - 0x000a: 0x0a, # CONTROL CHARACTER - 0x000b: 0x0b, # CONTROL CHARACTER - 0x000c: 0x0c, # CONTROL CHARACTER - 0x000d: 0x0d, # CONTROL CHARACTER - 0x000e: 0x0e, # CONTROL CHARACTER - 0x000f: 0x0f, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER 0x0010: 0x10, # CONTROL CHARACTER 0x0011: 0x11, # CONTROL CHARACTER 0x0012: 0x12, # CONTROL CHARACTER @@ -319,12 +319,12 @@ 0x0017: 0x17, # CONTROL CHARACTER 0x0018: 0x18, # CONTROL CHARACTER 0x0019: 0x19, # CONTROL CHARACTER - 0x001a: 0x1a, # CONTROL CHARACTER - 0x001b: 0x1b, # CONTROL CHARACTER - 0x001c: 0x1c, # CONTROL CHARACTER - 0x001d: 0x1d, # CONTROL CHARACTER - 0x001e: 0x1e, # CONTROL CHARACTER - 0x001f: 0x1f, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,40 +415,40 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # CONTROL CHARACTER - 0x00a0: 0xca, # NO-BREAK SPACE - 0x00a3: 0xa3, # POUND SIGN - 0x00a7: 0xa4, # SECTION SIGN - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00ab: 0xc7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xc2, # NOT SIGN - 0x00ae: 0xa8, # REGISTERED SIGN - 0x00b0: 0xa1, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xa6, # PILCROW SIGN - 0x00bb: 0xc8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00f7: 0xd6, # DIVISION SIGN - 0x0192: 0xc4, # LATIN SMALL LETTER F WITH HOOK - 0x0401: 0xdd, # CYRILLIC CAPITAL LETTER IO - 0x0402: 0xab, # CYRILLIC CAPITAL LETTER DJE - 0x0403: 0xae, # CYRILLIC CAPITAL LETTER GJE - 0x0404: 0xb8, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0405: 0xc1, # CYRILLIC CAPITAL LETTER DZE - 0x0406: 0xa7, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0407: 0xba, # CYRILLIC CAPITAL LETTER YI - 0x0408: 0xb7, # CYRILLIC CAPITAL LETTER JE - 0x0409: 0xbc, # CYRILLIC CAPITAL LETTER LJE - 0x040a: 0xbe, # CYRILLIC CAPITAL LETTER NJE - 0x040b: 0xcb, # CYRILLIC CAPITAL LETTER TSHE - 0x040c: 0xcd, # CYRILLIC CAPITAL LETTER KJE - 0x040e: 0xd8, # CYRILLIC CAPITAL LETTER SHORT U - 0x040f: 0xda, # CYRILLIC CAPITAL LETTER DZHE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A3: 0xA3, # POUND SIGN + 0x00A7: 0xA4, # SECTION SIGN + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00B0: 0xA1, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xA6, # PILCROW SIGN + 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00F7: 0xD6, # DIVISION SIGN + 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK + 0x0401: 0xDD, # CYRILLIC CAPITAL LETTER IO + 0x0402: 0xAB, # CYRILLIC CAPITAL LETTER DJE + 0x0403: 0xAE, # CYRILLIC CAPITAL LETTER GJE + 0x0404: 0xB8, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0xC1, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0xA7, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0xBA, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0xB7, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0xBC, # CYRILLIC CAPITAL LETTER LJE + 0x040A: 0xBE, # CYRILLIC CAPITAL LETTER NJE + 0x040B: 0xCB, # CYRILLIC CAPITAL LETTER TSHE + 0x040C: 0xCD, # CYRILLIC CAPITAL LETTER KJE + 0x040E: 0xD8, # CYRILLIC CAPITAL LETTER SHORT U + 0x040F: 0xDA, # CYRILLIC CAPITAL LETTER DZHE 0x0410: 0x80, # CYRILLIC CAPITAL LETTER A 0x0411: 0x81, # CYRILLIC CAPITAL LETTER BE 0x0412: 0x82, # CYRILLIC CAPITAL LETTER VE @@ -459,12 +459,12 @@ 0x0417: 0x87, # CYRILLIC CAPITAL LETTER ZE 0x0418: 0x88, # CYRILLIC CAPITAL LETTER I 0x0419: 0x89, # CYRILLIC CAPITAL LETTER SHORT I - 0x041a: 0x8a, # CYRILLIC CAPITAL LETTER KA - 0x041b: 0x8b, # CYRILLIC CAPITAL LETTER EL - 0x041c: 0x8c, # CYRILLIC CAPITAL LETTER EM - 0x041d: 0x8d, # CYRILLIC CAPITAL LETTER EN - 0x041e: 0x8e, # CYRILLIC CAPITAL LETTER O - 0x041f: 0x8f, # CYRILLIC CAPITAL LETTER PE + 0x041A: 0x8A, # CYRILLIC CAPITAL LETTER KA + 0x041B: 0x8B, # CYRILLIC CAPITAL LETTER EL + 0x041C: 0x8C, # CYRILLIC CAPITAL LETTER EM + 0x041D: 0x8D, # CYRILLIC CAPITAL LETTER EN + 0x041E: 0x8E, # CYRILLIC CAPITAL LETTER O + 0x041F: 0x8F, # CYRILLIC CAPITAL LETTER PE 0x0420: 0x90, # CYRILLIC CAPITAL LETTER ER 0x0421: 0x91, # CYRILLIC CAPITAL LETTER ES 0x0422: 0x92, # CYRILLIC CAPITAL LETTER TE @@ -475,78 +475,79 @@ 0x0427: 0x97, # CYRILLIC CAPITAL LETTER CHE 0x0428: 0x98, # CYRILLIC CAPITAL LETTER SHA 0x0429: 0x99, # CYRILLIC CAPITAL LETTER SHCHA - 0x042a: 0x9a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042b: 0x9b, # CYRILLIC CAPITAL LETTER YERU - 0x042c: 0x9c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042d: 0x9d, # CYRILLIC CAPITAL LETTER E - 0x042e: 0x9e, # CYRILLIC CAPITAL LETTER YU - 0x042f: 0x9f, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0xe0, # CYRILLIC SMALL LETTER A - 0x0431: 0xe1, # CYRILLIC SMALL LETTER BE - 0x0432: 0xe2, # CYRILLIC SMALL LETTER VE - 0x0433: 0xe3, # CYRILLIC SMALL LETTER GHE - 0x0434: 0xe4, # CYRILLIC SMALL LETTER DE - 0x0435: 0xe5, # CYRILLIC SMALL LETTER IE - 0x0436: 0xe6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0xe7, # CYRILLIC SMALL LETTER ZE - 0x0438: 0xe8, # CYRILLIC SMALL LETTER I - 0x0439: 0xe9, # CYRILLIC SMALL LETTER SHORT I - 0x043a: 0xea, # CYRILLIC SMALL LETTER KA - 0x043b: 0xeb, # CYRILLIC SMALL LETTER EL - 0x043c: 0xec, # CYRILLIC SMALL LETTER EM - 0x043d: 0xed, # CYRILLIC SMALL LETTER EN - 0x043e: 0xee, # CYRILLIC SMALL LETTER O - 0x043f: 0xef, # CYRILLIC SMALL LETTER PE - 0x0440: 0xf0, # CYRILLIC SMALL LETTER ER - 0x0441: 0xf1, # CYRILLIC SMALL LETTER ES - 0x0442: 0xf2, # CYRILLIC SMALL LETTER TE - 0x0443: 0xf3, # CYRILLIC SMALL LETTER U - 0x0444: 0xf4, # CYRILLIC SMALL LETTER EF - 0x0445: 0xf5, # CYRILLIC SMALL LETTER HA - 0x0446: 0xf6, # CYRILLIC SMALL LETTER TSE - 0x0447: 0xf7, # CYRILLIC SMALL LETTER CHE - 0x0448: 0xf8, # CYRILLIC SMALL LETTER SHA - 0x0449: 0xf9, # CYRILLIC SMALL LETTER SHCHA - 0x044a: 0xfa, # CYRILLIC SMALL LETTER HARD SIGN - 0x044b: 0xfb, # CYRILLIC SMALL LETTER YERU - 0x044c: 0xfc, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044d: 0xfd, # CYRILLIC SMALL LETTER E - 0x044e: 0xfe, # CYRILLIC SMALL LETTER YU - 0x044f: 0xdf, # CYRILLIC SMALL LETTER YA - 0x0451: 0xde, # CYRILLIC SMALL LETTER IO - 0x0452: 0xac, # CYRILLIC SMALL LETTER DJE - 0x0453: 0xaf, # CYRILLIC SMALL LETTER GJE - 0x0454: 0xb9, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0455: 0xcf, # CYRILLIC SMALL LETTER DZE - 0x0456: 0xb4, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0457: 0xbb, # CYRILLIC SMALL LETTER YI - 0x0458: 0xc0, # CYRILLIC SMALL LETTER JE - 0x0459: 0xbd, # CYRILLIC SMALL LETTER LJE - 0x045a: 0xbf, # CYRILLIC SMALL LETTER NJE - 0x045b: 0xcc, # CYRILLIC SMALL LETTER TSHE - 0x045c: 0xce, # CYRILLIC SMALL LETTER KJE - 0x045e: 0xd9, # CYRILLIC SMALL LETTER SHORT U - 0x045f: 0xdb, # CYRILLIC SMALL LETTER DZHE - 0x0490: 0xa2, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN - 0x0491: 0xb6, # CYRILLIC SMALL LETTER GHE WITH UPTURN - 0x2013: 0xd0, # EN DASH - 0x2014: 0xd1, # EM DASH - 0x2018: 0xd4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xd5, # RIGHT SINGLE QUOTATION MARK - 0x201c: 0xd2, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0xd3, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0xd7, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xa0, # DAGGER - 0x2022: 0xa5, # BULLET - 0x2026: 0xc9, # HORIZONTAL ELLIPSIS - 0x20ac: 0xff, # EURO SIGN - 0x2116: 0xdc, # NUMERO SIGN - 0x2122: 0xaa, # TRADE MARK SIGN - 0x2206: 0xc6, # INCREMENT - 0x221a: 0xc3, # SQUARE ROOT - 0x221e: 0xb0, # INFINITY - 0x2248: 0xc5, # ALMOST EQUAL TO - 0x2260: 0xad, # NOT EQUAL TO - 0x2264: 0xb2, # LESS-THAN OR EQUAL TO - 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO -} \ No newline at end of file + 0x042A: 0x9A, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042B: 0x9B, # CYRILLIC CAPITAL LETTER YERU + 0x042C: 0x9C, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042D: 0x9D, # CYRILLIC CAPITAL LETTER E + 0x042E: 0x9E, # CYRILLIC CAPITAL LETTER YU + 0x042F: 0x9F, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0xE0, # CYRILLIC SMALL LETTER A + 0x0431: 0xE1, # CYRILLIC SMALL LETTER BE + 0x0432: 0xE2, # CYRILLIC SMALL LETTER VE + 0x0433: 0xE3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0xE4, # CYRILLIC SMALL LETTER DE + 0x0435: 0xE5, # CYRILLIC SMALL LETTER IE + 0x0436: 0xE6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0xE7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0xE8, # CYRILLIC SMALL LETTER I + 0x0439: 0xE9, # CYRILLIC SMALL LETTER SHORT I + 0x043A: 0xEA, # CYRILLIC SMALL LETTER KA + 0x043B: 0xEB, # CYRILLIC SMALL LETTER EL + 0x043C: 0xEC, # CYRILLIC SMALL LETTER EM + 0x043D: 0xED, # CYRILLIC SMALL LETTER EN + 0x043E: 0xEE, # CYRILLIC SMALL LETTER O + 0x043F: 0xEF, # CYRILLIC SMALL LETTER PE + 0x0440: 0xF0, # CYRILLIC SMALL LETTER ER + 0x0441: 0xF1, # CYRILLIC SMALL LETTER ES + 0x0442: 0xF2, # CYRILLIC SMALL LETTER TE + 0x0443: 0xF3, # CYRILLIC SMALL LETTER U + 0x0444: 0xF4, # CYRILLIC SMALL LETTER EF + 0x0445: 0xF5, # CYRILLIC SMALL LETTER HA + 0x0446: 0xF6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0xF7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0xF8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0xF9, # CYRILLIC SMALL LETTER SHCHA + 0x044A: 0xFA, # CYRILLIC SMALL LETTER HARD SIGN + 0x044B: 0xFB, # CYRILLIC SMALL LETTER YERU + 0x044C: 0xFC, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044D: 0xFD, # CYRILLIC SMALL LETTER E + 0x044E: 0xFE, # CYRILLIC SMALL LETTER YU + 0x044F: 0xDF, # CYRILLIC SMALL LETTER YA + 0x0451: 0xDE, # CYRILLIC SMALL LETTER IO + 0x0452: 0xAC, # CYRILLIC SMALL LETTER DJE + 0x0453: 0xAF, # CYRILLIC SMALL LETTER GJE + 0x0454: 0xB9, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0xCF, # CYRILLIC SMALL LETTER DZE + 0x0456: 0xB4, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0xBB, # CYRILLIC SMALL LETTER YI + 0x0458: 0xC0, # CYRILLIC SMALL LETTER JE + 0x0459: 0xBD, # CYRILLIC SMALL LETTER LJE + 0x045A: 0xBF, # CYRILLIC SMALL LETTER NJE + 0x045B: 0xCC, # CYRILLIC SMALL LETTER TSHE + 0x045C: 0xCE, # CYRILLIC SMALL LETTER KJE + 0x045E: 0xD9, # CYRILLIC SMALL LETTER SHORT U + 0x045F: 0xDB, # CYRILLIC SMALL LETTER DZHE + 0x0490: 0xA2, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x0491: 0xB6, # CYRILLIC SMALL LETTER GHE WITH UPTURN + 0x2013: 0xD0, # EN DASH + 0x2014: 0xD1, # EM DASH + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xD7, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0xA0, # DAGGER + 0x2022: 0xA5, # BULLET + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS + 0x20AC: 0xFF, # EURO SIGN + 0x2116: 0xDC, # NUMERO SIGN + 0x2122: 0xAA, # TRADE MARK SIGN + 0x2206: 0xC6, # INCREMENT + 0x221A: 0xC3, # SQUARE ROOT + 0x221E: 0xB0, # INFINITY + 0x2248: 0xC5, # ALMOST EQUAL TO + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO +} + Index: mac_farsi.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_farsi.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- mac_farsi.py 24 Oct 2005 12:07:48 -0000 1.2 +++ mac_farsi.py 24 Oct 2005 12:14:59 -0000 1.3 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> CONTROL CHARACTER u'\x08' # 0x08 -> CONTROL CHARACTER u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0a -> CONTROL CHARACTER - u'\x0b' # 0x0b -> CONTROL CHARACTER - u'\x0c' # 0x0c -> CONTROL CHARACTER - u'\r' # 0x0d -> CONTROL CHARACTER - u'\x0e' # 0x0e -> CONTROL CHARACTER - u'\x0f' # 0x0f -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER u'\x10' # 0x10 -> CONTROL CHARACTER u'\x11' # 0x11 -> CONTROL CHARACTER u'\x12' # 0x12 -> CONTROL CHARACTER @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> CONTROL CHARACTER u'\x18' # 0x18 -> CONTROL CHARACTER u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1a -> CONTROL CHARACTER - u'\x1b' # 0x1b -> CONTROL CHARACTER - u'\x1c' # 0x1c -> CONTROL CHARACTER - u'\x1d' # 0x1d -> CONTROL CHARACTER - u'\x1e' # 0x1e -> CONTROL CHARACTER - u'\x1f' # 0x1f -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER u' ' # 0x20 -> SPACE, left-right u'!' # 0x21 -> EXCLAMATION MARK, left-right u'"' # 0x22 -> QUOTATION MARK, left-right @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE, left-right u'(' # 0x28 -> LEFT PARENTHESIS, left-right u')' # 0x29 -> RIGHT PARENTHESIS, left-right - u'*' # 0x2a -> ASTERISK, left-right - u'+' # 0x2b -> PLUS SIGN, left-right - u',' # 0x2c -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR - u'-' # 0x2d -> HYPHEN-MINUS, left-right - u'.' # 0x2e -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR - u'/' # 0x2f -> SOLIDUS, left-right + u'*' # 0x2A -> ASTERISK, left-right + u'+' # 0x2B -> PLUS SIGN, left-right + u',' # 0x2C -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR + u'-' # 0x2D -> HYPHEN-MINUS, left-right + u'.' # 0x2E -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR + u'/' # 0x2F -> SOLIDUS, left-right u'0' # 0x30 -> DIGIT ZERO; in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO u'1' # 0x31 -> DIGIT ONE; in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE u'2' # 0x32 -> DIGIT TWO; in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE; in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE - u':' # 0x3a -> COLON, left-right - u';' # 0x3b -> SEMICOLON, left-right - u'<' # 0x3c -> LESS-THAN SIGN, left-right - u'=' # 0x3d -> EQUALS SIGN, left-right - u'>' # 0x3e -> GREATER-THAN SIGN, left-right - u'?' # 0x3f -> QUESTION MARK, left-right + u':' # 0x3A -> COLON, left-right + u';' # 0x3B -> SEMICOLON, left-right + u'<' # 0x3C -> LESS-THAN SIGN, left-right + u'=' # 0x3D -> EQUALS SIGN, left-right + u'>' # 0x3E -> GREATER-THAN SIGN, left-right + u'?' # 0x3F -> QUESTION MARK, left-right u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET, left-right - u'\\' # 0x5c -> REVERSE SOLIDUS, left-right - u']' # 0x5d -> RIGHT SQUARE BRACKET, left-right - u'^' # 0x5e -> CIRCUMFLEX ACCENT, left-right - u'_' # 0x5f -> LOW LINE, left-right + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET, left-right + u'\\' # 0x5C -> REVERSE SOLIDUS, left-right + u']' # 0x5D -> RIGHT SQUARE BRACKET, left-right + u'^' # 0x5E -> CIRCUMFLEX ACCENT, left-right + u'_' # 0x5F -> LOW LINE, left-right u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET, left-right - u'|' # 0x7c -> VERTICAL LINE, left-right - u'}' # 0x7d -> RIGHT CURLY BRACKET, left-right - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> CONTROL CHARACTER + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET, left-right + u'|' # 0x7C -> VERTICAL LINE, left-right + u'}' # 0x7D -> RIGHT CURLY BRACKET, left-right + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xa0' # 0x81 -> NO-BREAK SPACE, right-left u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA @@ -170,12 +170,12 @@ u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8a -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u06ba' # 0x8b -> ARABIC LETTER NOON GHUNNA - u'\xab' # 0x8c -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - u'\xe7' # 0x8d -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8e -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8f -> LATIN SMALL LETTER E WITH GRAVE + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u06ba' # 0x8B -> ARABIC LETTER NOON GHUNNA + u'\xab' # 0x8C -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE @@ -186,108 +186,108 @@ u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE u'\xbb' # 0x98 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9a -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0x9b -> DIVISION SIGN, right-left - u'\xfa' # 0x9c -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9d -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9e -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9f -> LATIN SMALL LETTER U WITH DIAERESIS - u' ' # 0xa0 -> SPACE, right-left - u'!' # 0xa1 -> EXCLAMATION MARK, right-left - u'"' # 0xa2 -> QUOTATION MARK, right-left - u'#' # 0xa3 -> NUMBER SIGN, right-left - u'$' # 0xa4 -> DOLLAR SIGN, right-left - u'\u066a' # 0xa5 -> ARABIC PERCENT SIGN - u'&' # 0xa6 -> AMPERSAND, right-left - u"'" # 0xa7 -> APOSTROPHE, right-left - u'(' # 0xa8 -> LEFT PARENTHESIS, right-left - u')' # 0xa9 -> RIGHT PARENTHESIS, right-left - u'*' # 0xaa -> ASTERISK, right-left - u'+' # 0xab -> PLUS SIGN, right-left - u'\u060c' # 0xac -> ARABIC COMMA - u'-' # 0xad -> HYPHEN-MINUS, right-left - u'.' # 0xae -> FULL STOP, right-left - u'/' # 0xaf -> SOLIDUS, right-left - u'\u06f0' # 0xb0 -> EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) - u'\u06f1' # 0xb1 -> EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) - u'\u06f2' # 0xb2 -> EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) - u'\u06f3' # 0xb3 -> EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) - u'\u06f4' # 0xb4 -> EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) - u'\u06f5' # 0xb5 -> EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) - u'\u06f6' # 0xb6 -> EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) - u'\u06f7' # 0xb7 -> EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) - u'\u06f8' # 0xb8 -> EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) - u'\u06f9' # 0xb9 -> EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) - u':' # 0xba -> COLON, right-left - u'\u061b' # 0xbb -> ARABIC SEMICOLON - u'<' # 0xbc -> LESS-THAN SIGN, right-left - u'=' # 0xbd -> EQUALS SIGN, right-left - u'>' # 0xbe -> GREATER-THAN SIGN, right-left - u'\u061f' # 0xbf -> ARABIC QUESTION MARK - u'\u274a' # 0xc0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left - u'\u0621' # 0xc1 -> ARABIC LETTER HAMZA - u'\u0622' # 0xc2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0xc3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0xc4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0xc5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0xc6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0xc7 -> ARABIC LETTER ALEF - u'\u0628' # 0xc8 -> ARABIC LETTER BEH - u'\u0629' # 0xc9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0xca -> ARABIC LETTER TEH - u'\u062b' # 0xcb -> ARABIC LETTER THEH - u'\u062c' # 0xcc -> ARABIC LETTER JEEM - u'\u062d' # 0xcd -> ARABIC LETTER HAH - u'\u062e' # 0xce -> ARABIC LETTER KHAH - u'\u062f' # 0xcf -> ARABIC LETTER DAL - u'\u0630' # 0xd0 -> ARABIC LETTER THAL - u'\u0631' # 0xd1 -> ARABIC LETTER REH - u'\u0632' # 0xd2 -> ARABIC LETTER ZAIN - u'\u0633' # 0xd3 -> ARABIC LETTER SEEN - u'\u0634' # 0xd4 -> ARABIC LETTER SHEEN - u'\u0635' # 0xd5 -> ARABIC LETTER SAD - u'\u0636' # 0xd6 -> ARABIC LETTER DAD - u'\u0637' # 0xd7 -> ARABIC LETTER TAH - u'\u0638' # 0xd8 -> ARABIC LETTER ZAH - u'\u0639' # 0xd9 -> ARABIC LETTER AIN - u'\u063a' # 0xda -> ARABIC LETTER GHAIN - u'[' # 0xdb -> LEFT SQUARE BRACKET, right-left - u'\\' # 0xdc -> REVERSE SOLIDUS, right-left - u']' # 0xdd -> RIGHT SQUARE BRACKET, right-left - u'^' # 0xde -> CIRCUMFLEX ACCENT, right-left - u'_' # 0xdf -> LOW LINE, right-left - u'\u0640' # 0xe0 -> ARABIC TATWEEL - u'\u0641' # 0xe1 -> ARABIC LETTER FEH - u'\u0642' # 0xe2 -> ARABIC LETTER QAF - u'\u0643' # 0xe3 -> ARABIC LETTER KAF - u'\u0644' # 0xe4 -> ARABIC LETTER LAM - u'\u0645' # 0xe5 -> ARABIC LETTER MEEM - u'\u0646' # 0xe6 -> ARABIC LETTER NOON - u'\u0647' # 0xe7 -> ARABIC LETTER HEH - u'\u0648' # 0xe8 -> ARABIC LETTER WAW - u'\u0649' # 0xe9 -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0xea -> ARABIC LETTER YEH - u'\u064b' # 0xeb -> ARABIC FATHATAN - u'\u064c' # 0xec -> ARABIC DAMMATAN - u'\u064d' # 0xed -> ARABIC KASRATAN - u'\u064e' # 0xee -> ARABIC FATHA - u'\u064f' # 0xef -> ARABIC DAMMA - u'\u0650' # 0xf0 -> ARABIC KASRA - u'\u0651' # 0xf1 -> ARABIC SHADDA - u'\u0652' # 0xf2 -> ARABIC SUKUN - u'\u067e' # 0xf3 -> ARABIC LETTER PEH - u'\u0679' # 0xf4 -> ARABIC LETTER TTEH - u'\u0686' # 0xf5 -> ARABIC LETTER TCHEH - u'\u06d5' # 0xf6 -> ARABIC LETTER AE - u'\u06a4' # 0xf7 -> ARABIC LETTER VEH - u'\u06af' # 0xf8 -> ARABIC LETTER GAF - u'\u0688' # 0xf9 -> ARABIC LETTER DDAL - u'\u0691' # 0xfa -> ARABIC LETTER RREH - u'{' # 0xfb -> LEFT CURLY BRACKET, right-left - u'|' # 0xfc -> VERTICAL LINE, right-left - u'}' # 0xfd -> RIGHT CURLY BRACKET, right-left - u'\u0698' # 0xfe -> ARABIC LETTER JEH - u'\u06d2' # 0xff -> ARABIC LETTER YEH BARREE + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x9B -> DIVISION SIGN, right-left + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u' ' # 0xA0 -> SPACE, right-left + u'!' # 0xA1 -> EXCLAMATION MARK, right-left + u'"' # 0xA2 -> QUOTATION MARK, right-left + u'#' # 0xA3 -> NUMBER SIGN, right-left + u'$' # 0xA4 -> DOLLAR SIGN, right-left + u'\u066a' # 0xA5 -> ARABIC PERCENT SIGN + u'&' # 0xA6 -> AMPERSAND, right-left + u"'" # 0xA7 -> APOSTROPHE, right-left + u'(' # 0xA8 -> LEFT PARENTHESIS, right-left + u')' # 0xA9 -> RIGHT PARENTHESIS, right-left + u'*' # 0xAA -> ASTERISK, right-left + u'+' # 0xAB -> PLUS SIGN, right-left + u'\u060c' # 0xAC -> ARABIC COMMA + u'-' # 0xAD -> HYPHEN-MINUS, right-left + u'.' # 0xAE -> FULL STOP, right-left + u'/' # 0xAF -> SOLIDUS, right-left + u'\u06f0' # 0xB0 -> EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) + u'\u06f1' # 0xB1 -> EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) + u'\u06f2' # 0xB2 -> EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) + u'\u06f3' # 0xB3 -> EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) + u'\u06f4' # 0xB4 -> EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) + u'\u06f5' # 0xB5 -> EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) + u'\u06f6' # 0xB6 -> EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) + u'\u06f7' # 0xB7 -> EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) + u'\u06f8' # 0xB8 -> EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) + u'\u06f9' # 0xB9 -> EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) + u':' # 0xBA -> COLON, right-left + u'\u061b' # 0xBB -> ARABIC SEMICOLON + u'<' # 0xBC -> LESS-THAN SIGN, right-left + u'=' # 0xBD -> EQUALS SIGN, right-left + u'>' # 0xBE -> GREATER-THAN SIGN, right-left + u'\u061f' # 0xBF -> ARABIC QUESTION MARK + u'\u274a' # 0xC0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left + u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA + u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0xC7 -> ARABIC LETTER ALEF + u'\u0628' # 0xC8 -> ARABIC LETTER BEH + u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0xCA -> ARABIC LETTER TEH + u'\u062b' # 0xCB -> ARABIC LETTER THEH + u'\u062c' # 0xCC -> ARABIC LETTER JEEM + u'\u062d' # 0xCD -> ARABIC LETTER HAH + u'\u062e' # 0xCE -> ARABIC LETTER KHAH + u'\u062f' # 0xCF -> ARABIC LETTER DAL + u'\u0630' # 0xD0 -> ARABIC LETTER THAL + u'\u0631' # 0xD1 -> ARABIC LETTER REH + u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN + u'\u0633' # 0xD3 -> ARABIC LETTER SEEN + u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN + u'\u0635' # 0xD5 -> ARABIC LETTER SAD + u'\u0636' # 0xD6 -> ARABIC LETTER DAD + u'\u0637' # 0xD7 -> ARABIC LETTER TAH + u'\u0638' # 0xD8 -> ARABIC LETTER ZAH + u'\u0639' # 0xD9 -> ARABIC LETTER AIN + u'\u063a' # 0xDA -> ARABIC LETTER GHAIN + u'[' # 0xDB -> LEFT SQUARE BRACKET, right-left + u'\\' # 0xDC -> REVERSE SOLIDUS, right-left + u']' # 0xDD -> RIGHT SQUARE BRACKET, right-left + u'^' # 0xDE -> CIRCUMFLEX ACCENT, right-left + u'_' # 0xDF -> LOW LINE, right-left + u'\u0640' # 0xE0 -> ARABIC TATWEEL + u'\u0641' # 0xE1 -> ARABIC LETTER FEH + u'\u0642' # 0xE2 -> ARABIC LETTER QAF + u'\u0643' # 0xE3 -> ARABIC LETTER KAF + u'\u0644' # 0xE4 -> ARABIC LETTER LAM + u'\u0645' # 0xE5 -> ARABIC LETTER MEEM + u'\u0646' # 0xE6 -> ARABIC LETTER NOON + u'\u0647' # 0xE7 -> ARABIC LETTER HEH + u'\u0648' # 0xE8 -> ARABIC LETTER WAW + u'\u0649' # 0xE9 -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0xEA -> ARABIC LETTER YEH + u'\u064b' # 0xEB -> ARABIC FATHATAN + u'\u064c' # 0xEC -> ARABIC DAMMATAN + u'\u064d' # 0xED -> ARABIC KASRATAN + u'\u064e' # 0xEE -> ARABIC FATHA + u'\u064f' # 0xEF -> ARABIC DAMMA + u'\u0650' # 0xF0 -> ARABIC KASRA + u'\u0651' # 0xF1 -> ARABIC SHADDA + u'\u0652' # 0xF2 -> ARABIC SUKUN + u'\u067e' # 0xF3 -> ARABIC LETTER PEH + u'\u0679' # 0xF4 -> ARABIC LETTER TTEH + u'\u0686' # 0xF5 -> ARABIC LETTER TCHEH + u'\u06d5' # 0xF6 -> ARABIC LETTER AE + u'\u06a4' # 0xF7 -> ARABIC LETTER VEH + u'\u06af' # 0xF8 -> ARABIC LETTER GAF + u'\u0688' # 0xF9 -> ARABIC LETTER DDAL + u'\u0691' # 0xFA -> ARABIC LETTER RREH + u'{' # 0xFB -> LEFT CURLY BRACKET, right-left + u'|' # 0xFC -> VERTICAL LINE, right-left + u'}' # 0xFD -> RIGHT CURLY BRACKET, right-left + u'\u0698' # 0xFE -> ARABIC LETTER JEH + u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # CONTROL CHARACTER 0x0008: 0x08, # CONTROL CHARACTER 0x0009: 0x09, # CONTROL CHARACTER - 0x000a: 0x0a, # CONTROL CHARACTER - 0x000b: 0x0b, # CONTROL CHARACTER - 0x000c: 0x0c, # CONTROL CHARACTER - 0x000d: 0x0d, # CONTROL CHARACTER - 0x000e: 0x0e, # CONTROL CHARACTER - 0x000f: 0x0f, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER 0x0010: 0x10, # CONTROL CHARACTER 0x0011: 0x11, # CONTROL CHARACTER 0x0012: 0x12, # CONTROL CHARACTER @@ -319,42 +319,42 @@ 0x0017: 0x17, # CONTROL CHARACTER 0x0018: 0x18, # CONTROL CHARACTER 0x0019: 0x19, # CONTROL CHARACTER - 0x001a: 0x1a, # CONTROL CHARACTER - 0x001b: 0x1b, # CONTROL CHARACTER - 0x001c: 0x1c, # CONTROL CHARACTER - 0x001d: 0x1d, # CONTROL CHARACTER - 0x001e: 0x1e, # CONTROL CHARACTER - 0x001f: 0x1f, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER 0x0020: 0x20, # SPACE, left-right - 0x0020: 0xa0, # SPACE, right-left + 0x0020: 0xA0, # SPACE, right-left 0x0021: 0x21, # EXCLAMATION MARK, left-right - 0x0021: 0xa1, # EXCLAMATION MARK, right-left + 0x0021: 0xA1, # EXCLAMATION MARK, right-left 0x0022: 0x22, # QUOTATION MARK, left-right - 0x0022: 0xa2, # QUOTATION MARK, right-left + 0x0022: 0xA2, # QUOTATION MARK, right-left 0x0023: 0x23, # NUMBER SIGN, left-right - 0x0023: 0xa3, # NUMBER SIGN, right-left + 0x0023: 0xA3, # NUMBER SIGN, right-left 0x0024: 0x24, # DOLLAR SIGN, left-right - 0x0024: 0xa4, # DOLLAR SIGN, right-left + 0x0024: 0xA4, # DOLLAR SIGN, right-left 0x0025: 0x25, # PERCENT SIGN, left-right 0x0026: 0x26, # AMPERSAND, left-right - 0x0026: 0xa6, # AMPERSAND, right-left + 0x0026: 0xA6, # AMPERSAND, right-left 0x0027: 0x27, # APOSTROPHE, left-right - 0x0027: 0xa7, # APOSTROPHE, right-left + 0x0027: 0xA7, # APOSTROPHE, right-left 0x0028: 0x28, # LEFT PARENTHESIS, left-right - 0x0028: 0xa8, # LEFT PARENTHESIS, right-left + 0x0028: 0xA8, # LEFT PARENTHESIS, right-left 0x0029: 0x29, # RIGHT PARENTHESIS, left-right - 0x0029: 0xa9, # RIGHT PARENTHESIS, right-left - 0x002a: 0x2a, # ASTERISK, left-right - 0x002a: 0xaa, # ASTERISK, right-left - 0x002b: 0x2b, # PLUS SIGN, left-right - 0x002b: 0xab, # PLUS SIGN, right-left - 0x002c: 0x2c, # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR - 0x002d: 0x2d, # HYPHEN-MINUS, left-right - 0x002d: 0xad, # HYPHEN-MINUS, right-left - 0x002e: 0x2e, # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR - 0x002e: 0xae, # FULL STOP, right-left - 0x002f: 0x2f, # SOLIDUS, left-right - 0x002f: 0xaf, # SOLIDUS, right-left + 0x0029: 0xA9, # RIGHT PARENTHESIS, right-left + 0x002A: 0x2A, # ASTERISK, left-right + 0x002A: 0xAA, # ASTERISK, right-left + 0x002B: 0x2B, # PLUS SIGN, left-right + 0x002B: 0xAB, # PLUS SIGN, right-left + 0x002C: 0x2C, # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR + 0x002D: 0x2D, # HYPHEN-MINUS, left-right + 0x002D: 0xAD, # HYPHEN-MINUS, right-left + 0x002E: 0x2E, # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR + 0x002E: 0xAE, # FULL STOP, right-left + 0x002F: 0x2F, # SOLIDUS, left-right + 0x002F: 0xAF, # SOLIDUS, right-left 0x0030: 0x30, # DIGIT ZERO; in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO 0x0031: 0x31, # DIGIT ONE; in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE 0x0032: 0x32, # DIGIT TWO; in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO @@ -365,16 +365,16 @@ 0x0037: 0x37, # DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE; in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE - 0x003a: 0x3a, # COLON, left-right - 0x003a: 0xba, # COLON, right-left - 0x003b: 0x3b, # SEMICOLON, left-right - 0x003c: 0x3c, # LESS-THAN SIGN, left-right - 0x003c: 0xbc, # LESS-THAN SIGN, right-left - 0x003d: 0x3d, # EQUALS SIGN, left-right - 0x003d: 0xbd, # EQUALS SIGN, right-left - 0x003e: 0x3e, # GREATER-THAN SIGN, left-right - 0x003e: 0xbe, # GREATER-THAN SIGN, right-left - 0x003f: 0x3f, # QUESTION MARK, left-right + 0x003A: 0x3A, # COLON, left-right + 0x003A: 0xBA, # COLON, right-left + 0x003B: 0x3B, # SEMICOLON, left-right + 0x003C: 0x3C, # LESS-THAN SIGN, left-right + 0x003C: 0xBC, # LESS-THAN SIGN, right-left + 0x003D: 0x3D, # EQUALS SIGN, left-right + 0x003D: 0xBD, # EQUALS SIGN, right-left + 0x003E: 0x3E, # GREATER-THAN SIGN, left-right + 0x003E: 0xBE, # GREATER-THAN SIGN, right-left + 0x003F: 0x3F, # QUESTION MARK, left-right 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -385,12 +385,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -401,17 +401,17 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET, left-right - 0x005b: 0xdb, # LEFT SQUARE BRACKET, right-left - 0x005c: 0x5c, # REVERSE SOLIDUS, left-right - 0x005c: 0xdc, # REVERSE SOLIDUS, right-left - 0x005d: 0x5d, # RIGHT SQUARE BRACKET, left-right - 0x005d: 0xdd, # RIGHT SQUARE BRACKET, right-left - 0x005e: 0x5e, # CIRCUMFLEX ACCENT, left-right - 0x005e: 0xde, # CIRCUMFLEX ACCENT, right-left - 0x005f: 0x5f, # LOW LINE, left-right - 0x005f: 0xdf, # LOW LINE, right-left + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET, left-right + 0x005B: 0xDB, # LEFT SQUARE BRACKET, right-left + 0x005C: 0x5C, # REVERSE SOLIDUS, left-right + 0x005C: 0xDC, # REVERSE SOLIDUS, right-left + 0x005D: 0x5D, # RIGHT SQUARE BRACKET, left-right + 0x005D: 0xDD, # RIGHT SQUARE BRACKET, right-left + 0x005E: 0x5E, # CIRCUMFLEX ACCENT, left-right + 0x005E: 0xDE, # CIRCUMFLEX ACCENT, right-left + 0x005F: 0x5F, # LOW LINE, left-right + 0x005F: 0xDF, # LOW LINE, right-left 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -422,12 +422,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -438,115 +438,116 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET, left-right - 0x007b: 0xfb, # LEFT CURLY BRACKET, right-left - 0x007c: 0x7c, # VERTICAL LINE, left-right - 0x007c: 0xfc, # VERTICAL LINE, right-left - 0x007d: 0x7d, # RIGHT CURLY BRACKET, left-right - 0x007d: 0xfd, # RIGHT CURLY BRACKET, right-left - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # CONTROL CHARACTER - 0x00a0: 0x81, # NO-BREAK SPACE, right-left - 0x00ab: 0x8c, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x00bb: 0x98, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x00c4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00dc: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00e0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x8a, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e7: 0x8d, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x8f, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x8e, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ed: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00f3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x9a, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x9b, # DIVISION SIGN, right-left - 0x00f9: 0x9d, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x9c, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x9e, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x9f, # LATIN SMALL LETTER U WITH DIAERESIS - 0x060c: 0xac, # ARABIC COMMA - 0x061b: 0xbb, # ARABIC SEMICOLON - 0x061f: 0xbf, # ARABIC QUESTION MARK - 0x0621: 0xc1, # ARABIC LETTER HAMZA - 0x0622: 0xc2, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x0623: 0xc3, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x0624: 0xc4, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x0625: 0xc5, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x0626: 0xc6, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x0627: 0xc7, # ARABIC LETTER ALEF - 0x0628: 0xc8, # ARABIC LETTER BEH - 0x0629: 0xc9, # ARABIC LETTER TEH MARBUTA - 0x062a: 0xca, # ARABIC LETTER TEH - 0x062b: 0xcb, # ARABIC LETTER THEH - 0x062c: 0xcc, # ARABIC LETTER JEEM - 0x062d: 0xcd, # ARABIC LETTER HAH - 0x062e: 0xce, # ARABIC LETTER KHAH - 0x062f: 0xcf, # ARABIC LETTER DAL - 0x0630: 0xd0, # ARABIC LETTER THAL - 0x0631: 0xd1, # ARABIC LETTER REH - 0x0632: 0xd2, # ARABIC LETTER ZAIN - 0x0633: 0xd3, # ARABIC LETTER SEEN - 0x0634: 0xd4, # ARABIC LETTER SHEEN - 0x0635: 0xd5, # ARABIC LETTER SAD - 0x0636: 0xd6, # ARABIC LETTER DAD - 0x0637: 0xd7, # ARABIC LETTER TAH - 0x0638: 0xd8, # ARABIC LETTER ZAH - 0x0639: 0xd9, # ARABIC LETTER AIN - 0x063a: 0xda, # ARABIC LETTER GHAIN - 0x0640: 0xe0, # ARABIC TATWEEL - 0x0641: 0xe1, # ARABIC LETTER FEH - 0x0642: 0xe2, # ARABIC LETTER QAF - 0x0643: 0xe3, # ARABIC LETTER KAF - 0x0644: 0xe4, # ARABIC LETTER LAM - 0x0645: 0xe5, # ARABIC LETTER MEEM - 0x0646: 0xe6, # ARABIC LETTER NOON - 0x0647: 0xe7, # ARABIC LETTER HEH - 0x0648: 0xe8, # ARABIC LETTER WAW - 0x0649: 0xe9, # ARABIC LETTER ALEF MAKSURA - 0x064a: 0xea, # ARABIC LETTER YEH - 0x064b: 0xeb, # ARABIC FATHATAN - 0x064c: 0xec, # ARABIC DAMMATAN - 0x064d: 0xed, # ARABIC KASRATAN - 0x064e: 0xee, # ARABIC FATHA - 0x064f: 0xef, # ARABIC DAMMA - 0x0650: 0xf0, # ARABIC KASRA - 0x0651: 0xf1, # ARABIC SHADDA - 0x0652: 0xf2, # ARABIC SUKUN - 0x066a: 0xa5, # ARABIC PERCENT SIGN - 0x0679: 0xf4, # ARABIC LETTER TTEH - 0x067e: 0xf3, # ARABIC LETTER PEH - 0x0686: 0xf5, # ARABIC LETTER TCHEH - 0x0688: 0xf9, # ARABIC LETTER DDAL - 0x0691: 0xfa, # ARABIC LETTER RREH - 0x0698: 0xfe, # ARABIC LETTER JEH - 0x06a4: 0xf7, # ARABIC LETTER VEH - 0x06af: 0xf8, # ARABIC LETTER GAF - 0x06ba: 0x8b, # ARABIC LETTER NOON GHUNNA - 0x06d2: 0xff, # ARABIC LETTER YEH BARREE - 0x06d5: 0xf6, # ARABIC LETTER AE - 0x06f0: 0xb0, # EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) - 0x06f1: 0xb1, # EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) - 0x06f2: 0xb2, # EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) - 0x06f3: 0xb3, # EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) - 0x06f4: 0xb4, # EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) - 0x06f5: 0xb5, # EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) - 0x06f6: 0xb6, # EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) - 0x06f7: 0xb7, # EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) - 0x06f8: 0xb8, # EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) - 0x06f9: 0xb9, # EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET, left-right + 0x007B: 0xFB, # LEFT CURLY BRACKET, right-left + 0x007C: 0x7C, # VERTICAL LINE, left-right + 0x007C: 0xFC, # VERTICAL LINE, right-left + 0x007D: 0x7D, # RIGHT CURLY BRACKET, left-right + 0x007D: 0xFD, # RIGHT CURLY BRACKET, right-left + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0x81, # NO-BREAK SPACE, right-left + 0x00AB: 0x8C, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + 0x00BB: 0x98, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE + 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0x9B, # DIVISION SIGN, right-left + 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x060C: 0xAC, # ARABIC COMMA + 0x061B: 0xBB, # ARABIC SEMICOLON + 0x061F: 0xBF, # ARABIC QUESTION MARK + 0x0621: 0xC1, # ARABIC LETTER HAMZA + 0x0622: 0xC2, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x0623: 0xC3, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x0624: 0xC4, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x0625: 0xC5, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x0626: 0xC6, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x0627: 0xC7, # ARABIC LETTER ALEF + 0x0628: 0xC8, # ARABIC LETTER BEH + 0x0629: 0xC9, # ARABIC LETTER TEH MARBUTA + 0x062A: 0xCA, # ARABIC LETTER TEH + 0x062B: 0xCB, # ARABIC LETTER THEH + 0x062C: 0xCC, # ARABIC LETTER JEEM + 0x062D: 0xCD, # ARABIC LETTER HAH + 0x062E: 0xCE, # ARABIC LETTER KHAH + 0x062F: 0xCF, # ARABIC LETTER DAL + 0x0630: 0xD0, # ARABIC LETTER THAL + 0x0631: 0xD1, # ARABIC LETTER REH + 0x0632: 0xD2, # ARABIC LETTER ZAIN + 0x0633: 0xD3, # ARABIC LETTER SEEN + 0x0634: 0xD4, # ARABIC LETTER SHEEN + 0x0635: 0xD5, # ARABIC LETTER SAD + 0x0636: 0xD6, # ARABIC LETTER DAD + 0x0637: 0xD7, # ARABIC LETTER TAH + 0x0638: 0xD8, # ARABIC LETTER ZAH + 0x0639: 0xD9, # ARABIC LETTER AIN + 0x063A: 0xDA, # ARABIC LETTER GHAIN + 0x0640: 0xE0, # ARABIC TATWEEL + 0x0641: 0xE1, # ARABIC LETTER FEH + 0x0642: 0xE2, # ARABIC LETTER QAF + 0x0643: 0xE3, # ARABIC LETTER KAF + 0x0644: 0xE4, # ARABIC LETTER LAM + 0x0645: 0xE5, # ARABIC LETTER MEEM + 0x0646: 0xE6, # ARABIC LETTER NOON + 0x0647: 0xE7, # ARABIC LETTER HEH + 0x0648: 0xE8, # ARABIC LETTER WAW + 0x0649: 0xE9, # ARABIC LETTER ALEF MAKSURA + 0x064A: 0xEA, # ARABIC LETTER YEH + 0x064B: 0xEB, # ARABIC FATHATAN + 0x064C: 0xEC, # ARABIC DAMMATAN + 0x064D: 0xED, # ARABIC KASRATAN + 0x064E: 0xEE, # ARABIC FATHA + 0x064F: 0xEF, # ARABIC DAMMA + 0x0650: 0xF0, # ARABIC KASRA + 0x0651: 0xF1, # ARABIC SHADDA + 0x0652: 0xF2, # ARABIC SUKUN + 0x066A: 0xA5, # ARABIC PERCENT SIGN + 0x0679: 0xF4, # ARABIC LETTER TTEH + 0x067E: 0xF3, # ARABIC LETTER PEH + 0x0686: 0xF5, # ARABIC LETTER TCHEH + 0x0688: 0xF9, # ARABIC LETTER DDAL + 0x0691: 0xFA, # ARABIC LETTER RREH + 0x0698: 0xFE, # ARABIC LETTER JEH + 0x06A4: 0xF7, # ARABIC LETTER VEH + 0x06AF: 0xF8, # ARABIC LETTER GAF + 0x06BA: 0x8B, # ARABIC LETTER NOON GHUNNA + 0x06D2: 0xFF, # ARABIC LETTER YEH BARREE + 0x06D5: 0xF6, # ARABIC LETTER AE + 0x06F0: 0xB0, # EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) + 0x06F1: 0xB1, # EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) + 0x06F2: 0xB2, # EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) + 0x06F3: 0xB3, # EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) + 0x06F4: 0xB4, # EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) + 0x06F5: 0xB5, # EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) + 0x06F6: 0xB6, # EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) + 0x06F7: 0xB7, # EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) + 0x06F8: 0xB8, # EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) + 0x06F9: 0xB9, # EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) 0x2026: 0x93, # HORIZONTAL ELLIPSIS, right-left - 0x274a: 0xc0, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left -} \ No newline at end of file + 0x274A: 0xC0, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left +} + Index: mac_greek.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_greek.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- mac_greek.py 24 Oct 2005 12:07:48 -0000 1.6 +++ mac_greek.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> CONTROL CHARACTER u'\x08' # 0x08 -> CONTROL CHARACTER u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0a -> CONTROL CHARACTER - u'\x0b' # 0x0b -> CONTROL CHARACTER - u'\x0c' # 0x0c -> CONTROL CHARACTER - u'\r' # 0x0d -> CONTROL CHARACTER - u'\x0e' # 0x0e -> CONTROL CHARACTER - u'\x0f' # 0x0f -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER u'\x10' # 0x10 -> CONTROL CHARACTER u'\x11' # 0x11 -> CONTROL CHARACTER u'\x12' # 0x12 -> CONTROL CHARACTER @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> CONTROL CHARACTER u'\x18' # 0x18 -> CONTROL CHARACTER u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1a -> CONTROL CHARACTER - u'\x1b' # 0x1b -> CONTROL CHARACTER - u'\x1c' # 0x1c -> CONTROL CHARACTER - u'\x1d' # 0x1d -> CONTROL CHARACTER - u'\x1e' # 0x1e -> CONTROL CHARACTER - u'\x1f' # 0x1f -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> CONTROL CHARACTER + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xb9' # 0x81 -> SUPERSCRIPT ONE u'\xb2' # 0x82 -> SUPERSCRIPT TWO @@ -170,12 +170,12 @@ u'\u0385' # 0x87 -> GREEK DIALYTIKA TONOS u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8a -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u0384' # 0x8b -> GREEK TONOS - u'\xa8' # 0x8c -> DIAERESIS - u'\xe7' # 0x8d -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8e -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8f -> LATIN SMALL LETTER E WITH GRAVE + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u0384' # 0x8B -> GREEK TONOS + u'\xa8' # 0x8C -> DIAERESIS + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xa3' # 0x92 -> POUND SIGN @@ -186,108 +186,108 @@ u'\xbd' # 0x97 -> VULGAR FRACTION ONE HALF u'\u2030' # 0x98 -> PER MILLE SIGN u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9a -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xa6' # 0x9b -> BROKEN BAR - u'\u20ac' # 0x9c -> EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN - u'\xf9' # 0x9d -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9e -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9f -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xa0 -> DAGGER - u'\u0393' # 0xa1 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0xa2 -> GREEK CAPITAL LETTER DELTA - u'\u0398' # 0xa3 -> GREEK CAPITAL LETTER THETA - u'\u039b' # 0xa4 -> GREEK CAPITAL LETTER LAMDA - u'\u039e' # 0xa5 -> GREEK CAPITAL LETTER XI - u'\u03a0' # 0xa6 -> GREEK CAPITAL LETTER PI - u'\xdf' # 0xa7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xa8 -> REGISTERED SIGN - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\u03a3' # 0xaa -> GREEK CAPITAL LETTER SIGMA - u'\u03aa' # 0xab -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\xa7' # 0xac -> SECTION SIGN - u'\u2260' # 0xad -> NOT EQUAL TO - u'\xb0' # 0xae -> DEGREE SIGN - u'\xb7' # 0xaf -> MIDDLE DOT - u'\u0391' # 0xb0 -> GREEK CAPITAL LETTER ALPHA - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\u2264' # 0xb2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xb3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xb4 -> YEN SIGN - u'\u0392' # 0xb5 -> GREEK CAPITAL LETTER BETA - u'\u0395' # 0xb6 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0xb7 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0xb8 -> GREEK CAPITAL LETTER ETA - u'\u0399' # 0xb9 -> GREEK CAPITAL LETTER IOTA - u'\u039a' # 0xba -> GREEK CAPITAL LETTER KAPPA - u'\u039c' # 0xbb -> GREEK CAPITAL LETTER MU - u'\u03a6' # 0xbc -> GREEK CAPITAL LETTER PHI - u'\u03ab' # 0xbd -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\u03a8' # 0xbe -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0xbf -> GREEK CAPITAL LETTER OMEGA - u'\u03ac' # 0xc0 -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u039d' # 0xc1 -> GREEK CAPITAL LETTER NU - u'\xac' # 0xc2 -> NOT SIGN - u'\u039f' # 0xc3 -> GREEK CAPITAL LETTER OMICRON - u'\u03a1' # 0xc4 -> GREEK CAPITAL LETTER RHO - u'\u2248' # 0xc5 -> ALMOST EQUAL TO - u'\u03a4' # 0xc6 -> GREEK CAPITAL LETTER TAU - u'\xab' # 0xc7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xc8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xc9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xca -> NO-BREAK SPACE - u'\u03a5' # 0xcb -> GREEK CAPITAL LETTER UPSILON - u'\u03a7' # 0xcc -> GREEK CAPITAL LETTER CHI - u'\u0386' # 0xcd -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\u0388' # 0xce -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0153' # 0xcf -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xd0 -> EN DASH - u'\u2015' # 0xd1 -> HORIZONTAL BAR - u'\u201c' # 0xd2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xd3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xd4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xd5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xd6 -> DIVISION SIGN - u'\u0389' # 0xd7 -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0xd8 -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\u038c' # 0xd9 -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\u038e' # 0xda -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u03ad' # 0xdb -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xdc -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03af' # 0xdd -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03cc' # 0xde -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u038f' # 0xdf -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\u03cd' # 0xe0 -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03b1' # 0xe1 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0xe2 -> GREEK SMALL LETTER BETA - u'\u03c8' # 0xe3 -> GREEK SMALL LETTER PSI - u'\u03b4' # 0xe4 -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0xe5 -> GREEK SMALL LETTER EPSILON - u'\u03c6' # 0xe6 -> GREEK SMALL LETTER PHI - u'\u03b3' # 0xe7 -> GREEK SMALL LETTER GAMMA - u'\u03b7' # 0xe8 -> GREEK SMALL LETTER ETA - u'\u03b9' # 0xe9 -> GREEK SMALL LETTER IOTA - u'\u03be' # 0xea -> GREEK SMALL LETTER XI - u'\u03ba' # 0xeb -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0xec -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0xed -> GREEK SMALL LETTER MU - u'\u03bd' # 0xee -> GREEK SMALL LETTER NU - u'\u03bf' # 0xef -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xf0 -> GREEK SMALL LETTER PI - u'\u03ce' # 0xf1 -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\u03c1' # 0xf2 -> GREEK SMALL LETTER RHO - u'\u03c3' # 0xf3 -> GREEK SMALL LETTER SIGMA - u'\u03c4' # 0xf4 -> GREEK SMALL LETTER TAU - u'\u03b8' # 0xf5 -> GREEK SMALL LETTER THETA - u'\u03c9' # 0xf6 -> GREEK SMALL LETTER OMEGA - u'\u03c2' # 0xf7 -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c7' # 0xf8 -> GREEK SMALL LETTER CHI - u'\u03c5' # 0xf9 -> GREEK SMALL LETTER UPSILON - u'\u03b6' # 0xfa -> GREEK SMALL LETTER ZETA - u'\u03ca' # 0xfb -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03cb' # 0xfc -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u0390' # 0xfd -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u03b0' # 0xfe -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\xad' # 0xff -> SOFT HYPHEN # before Mac OS 9.2.2, was undefined + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xa6' # 0x9B -> BROKEN BAR + u'\u20ac' # 0x9C -> EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\u0393' # 0xA1 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0xA2 -> GREEK CAPITAL LETTER DELTA + u'\u0398' # 0xA3 -> GREEK CAPITAL LETTER THETA + u'\u039b' # 0xA4 -> GREEK CAPITAL LETTER LAMDA + u'\u039e' # 0xA5 -> GREEK CAPITAL LETTER XI + u'\u03a0' # 0xA6 -> GREEK CAPITAL LETTER PI + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u03a3' # 0xAA -> GREEK CAPITAL LETTER SIGMA + u'\u03aa' # 0xAB -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\xa7' # 0xAC -> SECTION SIGN + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\xb0' # 0xAE -> DEGREE SIGN + u'\xb7' # 0xAF -> MIDDLE DOT + u'\u0391' # 0xB0 -> GREEK CAPITAL LETTER ALPHA + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0xB4 -> YEN SIGN + u'\u0392' # 0xB5 -> GREEK CAPITAL LETTER BETA + u'\u0395' # 0xB6 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0xB7 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0xB8 -> GREEK CAPITAL LETTER ETA + u'\u0399' # 0xB9 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0xBA -> GREEK CAPITAL LETTER KAPPA + u'\u039c' # 0xBB -> GREEK CAPITAL LETTER MU + u'\u03a6' # 0xBC -> GREEK CAPITAL LETTER PHI + u'\u03ab' # 0xBD -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\u03a8' # 0xBE -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0xBF -> GREEK CAPITAL LETTER OMEGA + u'\u03ac' # 0xC0 -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u039d' # 0xC1 -> GREEK CAPITAL LETTER NU + u'\xac' # 0xC2 -> NOT SIGN + u'\u039f' # 0xC3 -> GREEK CAPITAL LETTER OMICRON + u'\u03a1' # 0xC4 -> GREEK CAPITAL LETTER RHO + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u03a4' # 0xC6 -> GREEK CAPITAL LETTER TAU + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\u03a5' # 0xCB -> GREEK CAPITAL LETTER UPSILON + u'\u03a7' # 0xCC -> GREEK CAPITAL LETTER CHI + u'\u0386' # 0xCD -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\u0388' # 0xCE -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u2013' # 0xD0 -> EN DASH + u'\u2015' # 0xD1 -> HORIZONTAL BAR + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u0389' # 0xD7 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0xD8 -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\u038c' # 0xD9 -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\u038e' # 0xDA -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u03ad' # 0xDB -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0xDC -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0xDD -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03cc' # 0xDE -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u038f' # 0xDF -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\u03cd' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA + u'\u03c8' # 0xE3 -> GREEK SMALL LETTER PSI + u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON + u'\u03c6' # 0xE6 -> GREEK SMALL LETTER PHI + u'\u03b3' # 0xE7 -> GREEK SMALL LETTER GAMMA + u'\u03b7' # 0xE8 -> GREEK SMALL LETTER ETA + u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA + u'\u03be' # 0xEA -> GREEK SMALL LETTER XI + u'\u03ba' # 0xEB -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0xEC -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0xED -> GREEK SMALL LETTER MU + u'\u03bd' # 0xEE -> GREEK SMALL LETTER NU + u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI + u'\u03ce' # 0xF1 -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u03c1' # 0xF2 -> GREEK SMALL LETTER RHO + u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA + u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU + u'\u03b8' # 0xF5 -> GREEK SMALL LETTER THETA + u'\u03c9' # 0xF6 -> GREEK SMALL LETTER OMEGA + u'\u03c2' # 0xF7 -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c7' # 0xF8 -> GREEK SMALL LETTER CHI + u'\u03c5' # 0xF9 -> GREEK SMALL LETTER UPSILON + u'\u03b6' # 0xFA -> GREEK SMALL LETTER ZETA + u'\u03ca' # 0xFB -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03cb' # 0xFC -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u0390' # 0xFD -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u03b0' # 0xFE -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\xad' # 0xFF -> SOFT HYPHEN # before Mac OS 9.2.2, was undefined ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # CONTROL CHARACTER 0x0008: 0x08, # CONTROL CHARACTER 0x0009: 0x09, # CONTROL CHARACTER - 0x000a: 0x0a, # CONTROL CHARACTER - 0x000b: 0x0b, # CONTROL CHARACTER - 0x000c: 0x0c, # CONTROL CHARACTER - 0x000d: 0x0d, # CONTROL CHARACTER - 0x000e: 0x0e, # CONTROL CHARACTER - 0x000f: 0x0f, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER 0x0010: 0x10, # CONTROL CHARACTER 0x0011: 0x11, # CONTROL CHARACTER 0x0012: 0x12, # CONTROL CHARACTER @@ -319,12 +319,12 @@ 0x0017: 0x17, # CONTROL CHARACTER 0x0018: 0x18, # CONTROL CHARACTER 0x0019: 0x19, # CONTROL CHARACTER - 0x001a: 0x1a, # CONTROL CHARACTER - 0x001b: 0x1b, # CONTROL CHARACTER - 0x001c: 0x1c, # CONTROL CHARACTER - 0x001d: 0x1d, # CONTROL CHARACTER - 0x001e: 0x1e, # CONTROL CHARACTER - 0x001f: 0x1f, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,138 +415,139 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # CONTROL CHARACTER - 0x00a0: 0xca, # NO-BREAK SPACE - 0x00a3: 0x92, # POUND SIGN - 0x00a5: 0xb4, # YEN SIGN - 0x00a6: 0x9b, # BROKEN BAR - 0x00a7: 0xac, # SECTION SIGN - 0x00a8: 0x8c, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00ab: 0xc7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xc2, # NOT SIGN - 0x00ad: 0xff, # SOFT HYPHEN # before Mac OS 9.2.2, was undefined - 0x00ae: 0xa8, # REGISTERED SIGN - 0x00b0: 0xae, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b2: 0x82, # SUPERSCRIPT TWO - 0x00b3: 0x84, # SUPERSCRIPT THREE - 0x00b7: 0xaf, # MIDDLE DOT - 0x00b9: 0x81, # SUPERSCRIPT ONE - 0x00bb: 0xc8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bd: 0x97, # VULGAR FRACTION ONE HALF - 0x00c4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00dc: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0xa7, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00e2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x8a, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e7: 0x8d, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x8f, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x8e, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ee: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x9a, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xd6, # DIVISION SIGN - 0x00f9: 0x9d, # LATIN SMALL LETTER U WITH GRAVE - 0x00fb: 0x9e, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x9f, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0153: 0xcf, # LATIN SMALL LIGATURE OE - 0x0384: 0x8b, # GREEK TONOS + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A3: 0x92, # POUND SIGN + 0x00A5: 0xB4, # YEN SIGN + 0x00A6: 0x9B, # BROKEN BAR + 0x00A7: 0xAC, # SECTION SIGN + 0x00A8: 0x8C, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AD: 0xFF, # SOFT HYPHEN # before Mac OS 9.2.2, was undefined + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00B0: 0xAE, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B2: 0x82, # SUPERSCRIPT TWO + 0x00B3: 0x84, # SUPERSCRIPT THREE + 0x00B7: 0xAF, # MIDDLE DOT + 0x00B9: 0x81, # SUPERSCRIPT ONE + 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BD: 0x97, # VULGAR FRACTION ONE HALF + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S + 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE + 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xD6, # DIVISION SIGN + 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE + 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0153: 0xCF, # LATIN SMALL LIGATURE OE + 0x0384: 0x8B, # GREEK TONOS 0x0385: 0x87, # GREEK DIALYTIKA TONOS - 0x0386: 0xcd, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0388: 0xce, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0xd7, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038a: 0xd8, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038c: 0xd9, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038e: 0xda, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038f: 0xdf, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0390: 0xfd, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x0391: 0xb0, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0xb5, # GREEK CAPITAL LETTER BETA - 0x0393: 0xa1, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0xa2, # GREEK CAPITAL LETTER DELTA - 0x0395: 0xb6, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0xb7, # GREEK CAPITAL LETTER ZETA - 0x0397: 0xb8, # GREEK CAPITAL LETTER ETA - 0x0398: 0xa3, # GREEK CAPITAL LETTER THETA - 0x0399: 0xb9, # GREEK CAPITAL LETTER IOTA - 0x039a: 0xba, # GREEK CAPITAL LETTER KAPPA - 0x039b: 0xa4, # GREEK CAPITAL LETTER LAMDA - 0x039c: 0xbb, # GREEK CAPITAL LETTER MU - 0x039d: 0xc1, # GREEK CAPITAL LETTER NU - 0x039e: 0xa5, # GREEK CAPITAL LETTER XI - 0x039f: 0xc3, # GREEK CAPITAL LETTER OMICRON - 0x03a0: 0xa6, # GREEK CAPITAL LETTER PI - 0x03a1: 0xc4, # GREEK CAPITAL LETTER RHO - 0x03a3: 0xaa, # GREEK CAPITAL LETTER SIGMA - 0x03a4: 0xc6, # GREEK CAPITAL LETTER TAU - 0x03a5: 0xcb, # GREEK CAPITAL LETTER UPSILON - 0x03a6: 0xbc, # GREEK CAPITAL LETTER PHI - 0x03a7: 0xcc, # GREEK CAPITAL LETTER CHI - 0x03a8: 0xbe, # GREEK CAPITAL LETTER PSI - 0x03a9: 0xbf, # GREEK CAPITAL LETTER OMEGA - 0x03aa: 0xab, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03ab: 0xbd, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03ac: 0xc0, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03ad: 0xdb, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03ae: 0xdc, # GREEK SMALL LETTER ETA WITH TONOS - 0x03af: 0xdd, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03b0: 0xfe, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03b1: 0xe1, # GREEK SMALL LETTER ALPHA - 0x03b2: 0xe2, # GREEK SMALL LETTER BETA - 0x03b3: 0xe7, # GREEK SMALL LETTER GAMMA - 0x03b4: 0xe4, # GREEK SMALL LETTER DELTA - 0x03b5: 0xe5, # GREEK SMALL LETTER EPSILON - 0x03b6: 0xfa, # GREEK SMALL LETTER ZETA - 0x03b7: 0xe8, # GREEK SMALL LETTER ETA - 0x03b8: 0xf5, # GREEK SMALL LETTER THETA - 0x03b9: 0xe9, # GREEK SMALL LETTER IOTA - 0x03ba: 0xeb, # GREEK SMALL LETTER KAPPA - 0x03bb: 0xec, # GREEK SMALL LETTER LAMDA - 0x03bc: 0xed, # GREEK SMALL LETTER MU - 0x03bd: 0xee, # GREEK SMALL LETTER NU - 0x03be: 0xea, # GREEK SMALL LETTER XI - 0x03bf: 0xef, # GREEK SMALL LETTER OMICRON - 0x03c0: 0xf0, # GREEK SMALL LETTER PI - 0x03c1: 0xf2, # GREEK SMALL LETTER RHO - 0x03c2: 0xf7, # GREEK SMALL LETTER FINAL SIGMA - 0x03c3: 0xf3, # GREEK SMALL LETTER SIGMA - 0x03c4: 0xf4, # GREEK SMALL LETTER TAU - 0x03c5: 0xf9, # GREEK SMALL LETTER UPSILON - 0x03c6: 0xe6, # GREEK SMALL LETTER PHI - 0x03c7: 0xf8, # GREEK SMALL LETTER CHI - 0x03c8: 0xe3, # GREEK SMALL LETTER PSI - 0x03c9: 0xf6, # GREEK SMALL LETTER OMEGA - 0x03ca: 0xfb, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03cb: 0xfc, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03cc: 0xde, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03cd: 0xe0, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03ce: 0xf1, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x2013: 0xd0, # EN DASH - 0x2015: 0xd1, # HORIZONTAL BAR - 0x2018: 0xd4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xd5, # RIGHT SINGLE QUOTATION MARK - 0x201c: 0xd2, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0xd3, # RIGHT DOUBLE QUOTATION MARK - 0x2020: 0xa0, # DAGGER + 0x0386: 0xCD, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0xCE, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0xD7, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038A: 0xD8, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038C: 0xD9, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038E: 0xDA, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038F: 0xDF, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0xFD, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0xB0, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0xB5, # GREEK CAPITAL LETTER BETA + 0x0393: 0xA1, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0xA2, # GREEK CAPITAL LETTER DELTA + 0x0395: 0xB6, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0xB7, # GREEK CAPITAL LETTER ZETA + 0x0397: 0xB8, # GREEK CAPITAL LETTER ETA + 0x0398: 0xA3, # GREEK CAPITAL LETTER THETA + 0x0399: 0xB9, # GREEK CAPITAL LETTER IOTA + 0x039A: 0xBA, # GREEK CAPITAL LETTER KAPPA + 0x039B: 0xA4, # GREEK CAPITAL LETTER LAMDA + 0x039C: 0xBB, # GREEK CAPITAL LETTER MU + 0x039D: 0xC1, # GREEK CAPITAL LETTER NU + 0x039E: 0xA5, # GREEK CAPITAL LETTER XI + 0x039F: 0xC3, # GREEK CAPITAL LETTER OMICRON + 0x03A0: 0xA6, # GREEK CAPITAL LETTER PI + 0x03A1: 0xC4, # GREEK CAPITAL LETTER RHO + 0x03A3: 0xAA, # GREEK CAPITAL LETTER SIGMA + 0x03A4: 0xC6, # GREEK CAPITAL LETTER TAU + 0x03A5: 0xCB, # GREEK CAPITAL LETTER UPSILON + 0x03A6: 0xBC, # GREEK CAPITAL LETTER PHI + 0x03A7: 0xCC, # GREEK CAPITAL LETTER CHI + 0x03A8: 0xBE, # GREEK CAPITAL LETTER PSI + 0x03A9: 0xBF, # GREEK CAPITAL LETTER OMEGA + 0x03AA: 0xAB, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03AB: 0xBD, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03AC: 0xC0, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03AD: 0xDB, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03AE: 0xDC, # GREEK SMALL LETTER ETA WITH TONOS + 0x03AF: 0xDD, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03B0: 0xFE, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03B1: 0xE1, # GREEK SMALL LETTER ALPHA + 0x03B2: 0xE2, # GREEK SMALL LETTER BETA + 0x03B3: 0xE7, # GREEK SMALL LETTER GAMMA + 0x03B4: 0xE4, # GREEK SMALL LETTER DELTA + 0x03B5: 0xE5, # GREEK SMALL LETTER EPSILON + 0x03B6: 0xFA, # GREEK SMALL LETTER ZETA + 0x03B7: 0xE8, # GREEK SMALL LETTER ETA + 0x03B8: 0xF5, # GREEK SMALL LETTER THETA + 0x03B9: 0xE9, # GREEK SMALL LETTER IOTA + 0x03BA: 0xEB, # GREEK SMALL LETTER KAPPA + 0x03BB: 0xEC, # GREEK SMALL LETTER LAMDA + 0x03BC: 0xED, # GREEK SMALL LETTER MU + 0x03BD: 0xEE, # GREEK SMALL LETTER NU + 0x03BE: 0xEA, # GREEK SMALL LETTER XI + 0x03BF: 0xEF, # GREEK SMALL LETTER OMICRON + 0x03C0: 0xF0, # GREEK SMALL LETTER PI + 0x03C1: 0xF2, # GREEK SMALL LETTER RHO + 0x03C2: 0xF7, # GREEK SMALL LETTER FINAL SIGMA + 0x03C3: 0xF3, # GREEK SMALL LETTER SIGMA + 0x03C4: 0xF4, # GREEK SMALL LETTER TAU + 0x03C5: 0xF9, # GREEK SMALL LETTER UPSILON + 0x03C6: 0xE6, # GREEK SMALL LETTER PHI + 0x03C7: 0xF8, # GREEK SMALL LETTER CHI + 0x03C8: 0xE3, # GREEK SMALL LETTER PSI + 0x03C9: 0xF6, # GREEK SMALL LETTER OMEGA + 0x03CA: 0xFB, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03CB: 0xFC, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03CC: 0xDE, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03CD: 0xE0, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03CE: 0xF1, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2013: 0xD0, # EN DASH + 0x2015: 0xD1, # HORIZONTAL BAR + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x2020: 0xA0, # DAGGER 0x2022: 0x96, # BULLET - 0x2026: 0xc9, # HORIZONTAL ELLIPSIS + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS 0x2030: 0x98, # PER MILLE SIGN - 0x20ac: 0x9c, # EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN + 0x20AC: 0x9C, # EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN 0x2122: 0x93, # TRADE MARK SIGN - 0x2248: 0xc5, # ALMOST EQUAL TO - 0x2260: 0xad, # NOT EQUAL TO - 0x2264: 0xb2, # LESS-THAN OR EQUAL TO - 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO -} \ No newline at end of file + 0x2248: 0xC5, # ALMOST EQUAL TO + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO +} + Index: mac_iceland.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_iceland.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- mac_iceland.py 24 Oct 2005 12:07:48 -0000 1.6 +++ mac_iceland.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> CONTROL CHARACTER u'\x08' # 0x08 -> CONTROL CHARACTER u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0a -> CONTROL CHARACTER - u'\x0b' # 0x0b -> CONTROL CHARACTER - u'\x0c' # 0x0c -> CONTROL CHARACTER - u'\r' # 0x0d -> CONTROL CHARACTER - u'\x0e' # 0x0e -> CONTROL CHARACTER - u'\x0f' # 0x0f -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER u'\x10' # 0x10 -> CONTROL CHARACTER u'\x11' # 0x11 -> CONTROL CHARACTER u'\x12' # 0x12 -> CONTROL CHARACTER @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> CONTROL CHARACTER u'\x18' # 0x18 -> CONTROL CHARACTER u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1a -> CONTROL CHARACTER - u'\x1b' # 0x1b -> CONTROL CHARACTER - u'\x1c' # 0x1c -> CONTROL CHARACTER - u'\x1d' # 0x1d -> CONTROL CHARACTER - u'\x1e' # 0x1e -> CONTROL CHARACTER - u'\x1f' # 0x1f -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> CONTROL CHARACTER + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA @@ -170,12 +170,12 @@ u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8a -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8b -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8c -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8d -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8e -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8f -> LATIN SMALL LETTER E WITH GRAVE + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE @@ -186,108 +186,108 @@ u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9a -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9b -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9c -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9d -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9e -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9f -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xdd' # 0xa0 -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xb0' # 0xa1 -> DEGREE SIGN - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa7' # 0xa4 -> SECTION SIGN - u'\u2022' # 0xa5 -> BULLET - u'\xb6' # 0xa6 -> PILCROW SIGN - u'\xdf' # 0xa7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xa8 -> REGISTERED SIGN - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\u2122' # 0xaa -> TRADE MARK SIGN - u'\xb4' # 0xab -> ACUTE ACCENT - u'\xa8' # 0xac -> DIAERESIS - u'\u2260' # 0xad -> NOT EQUAL TO - u'\xc6' # 0xae -> LATIN CAPITAL LETTER AE - u'\xd8' # 0xaf -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xb0 -> INFINITY - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\u2264' # 0xb2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xb3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xb4 -> YEN SIGN - u'\xb5' # 0xb5 -> MICRO SIGN - u'\u2202' # 0xb6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xb7 -> N-ARY SUMMATION - u'\u220f' # 0xb8 -> N-ARY PRODUCT - u'\u03c0' # 0xb9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xba -> INTEGRAL - u'\xaa' # 0xbb -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xbc -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xbd -> GREEK CAPITAL LETTER OMEGA - u'\xe6' # 0xbe -> LATIN SMALL LETTER AE - u'\xf8' # 0xbf -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xc0 -> INVERTED QUESTION MARK - u'\xa1' # 0xc1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xc2 -> NOT SIGN - u'\u221a' # 0xc3 -> SQUARE ROOT - u'\u0192' # 0xc4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xc5 -> ALMOST EQUAL TO - u'\u2206' # 0xc6 -> INCREMENT - u'\xab' # 0xc7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xc8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xc9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xca -> NO-BREAK SPACE - u'\xc0' # 0xcb -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xcc -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xcd -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xce -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xcf -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xd0 -> EN DASH - u'\u2014' # 0xd1 -> EM DASH - u'\u201c' # 0xd2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xd3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xd4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xd5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xd6 -> DIVISION SIGN - u'\u25ca' # 0xd7 -> LOZENGE - u'\xff' # 0xd8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xd9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u2044' # 0xda -> FRACTION SLASH - u'\u20ac' # 0xdb -> EURO SIGN - u'\xd0' # 0xdc -> LATIN CAPITAL LETTER ETH - u'\xf0' # 0xdd -> LATIN SMALL LETTER ETH - u'\xde' # 0xde -> LATIN CAPITAL LETTER THORN - u'\xfe' # 0xdf -> LATIN SMALL LETTER THORN - u'\xfd' # 0xe0 -> LATIN SMALL LETTER Y WITH ACUTE - u'\xb7' # 0xe1 -> MIDDLE DOT - u'\u201a' # 0xe2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xe3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xe4 -> PER MILLE SIGN - u'\xc2' # 0xe5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xe6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xe7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xe8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xe9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xea -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xeb -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xec -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xed -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xee -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xef -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xf0 -> Apple logo - u'\xd2' # 0xf1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xf2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xf3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xf4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xf5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xf6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xf7 -> SMALL TILDE - u'\xaf' # 0xf8 -> MACRON - u'\u02d8' # 0xf9 -> BREVE - u'\u02d9' # 0xfa -> DOT ABOVE - u'\u02da' # 0xfb -> RING ABOVE - u'\xb8' # 0xfc -> CEDILLA - u'\u02dd' # 0xfd -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xfe -> OGONEK - u'\u02c7' # 0xff -> CARON + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xdd' # 0xA0 -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\xb4' # 0xAB -> ACUTE ACCENT + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE + u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0xB4 -> YEN SIGN + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u220f' # 0xB8 -> N-ARY PRODUCT + u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI + u'\u222b' # 0xBA -> INTEGRAL + u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR + u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA + u'\xe6' # 0xBE -> LATIN SMALL LETTER AE + u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0xC0 -> INVERTED QUESTION MARK + u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u2044' # 0xDA -> FRACTION SLASH + u'\u20ac' # 0xDB -> EURO SIGN + u'\xd0' # 0xDC -> LATIN CAPITAL LETTER ETH + u'\xf0' # 0xDD -> LATIN SMALL LETTER ETH + u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN + u'\xfe' # 0xDF -> LATIN SMALL LETTER THORN + u'\xfd' # 0xE0 -> LATIN SMALL LETTER Y WITH ACUTE + u'\xb7' # 0xE1 -> MIDDLE DOT + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0xE4 -> PER MILLE SIGN + u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0xF0 -> Apple logo + u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0xF7 -> SMALL TILDE + u'\xaf' # 0xF8 -> MACRON + u'\u02d8' # 0xF9 -> BREVE + u'\u02d9' # 0xFA -> DOT ABOVE + u'\u02da' # 0xFB -> RING ABOVE + u'\xb8' # 0xFC -> CEDILLA + u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT + u'\u02db' # 0xFE -> OGONEK + u'\u02c7' # 0xFF -> CARON ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # CONTROL CHARACTER 0x0008: 0x08, # CONTROL CHARACTER 0x0009: 0x09, # CONTROL CHARACTER - 0x000a: 0x0a, # CONTROL CHARACTER - 0x000b: 0x0b, # CONTROL CHARACTER - 0x000c: 0x0c, # CONTROL CHARACTER - 0x000d: 0x0d, # CONTROL CHARACTER - 0x000e: 0x0e, # CONTROL CHARACTER - 0x000f: 0x0f, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER 0x0010: 0x10, # CONTROL CHARACTER 0x0011: 0x11, # CONTROL CHARACTER 0x0012: 0x12, # CONTROL CHARACTER @@ -319,12 +319,12 @@ 0x0017: 0x17, # CONTROL CHARACTER 0x0018: 0x18, # CONTROL CHARACTER 0x0019: 0x19, # CONTROL CHARACTER - 0x001a: 0x1a, # CONTROL CHARACTER - 0x001b: 0x1b, # CONTROL CHARACTER - 0x001c: 0x1c, # CONTROL CHARACTER - 0x001d: 0x1d, # CONTROL CHARACTER - 0x001e: 0x1e, # CONTROL CHARACTER - 0x001f: 0x1f, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,138 +415,139 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # CONTROL CHARACTER - 0x00a0: 0xca, # NO-BREAK SPACE - 0x00a1: 0xc1, # INVERTED EXCLAMATION MARK - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a5: 0xb4, # YEN SIGN - 0x00a7: 0xa4, # SECTION SIGN - 0x00a8: 0xac, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00aa: 0xbb, # FEMININE ORDINAL INDICATOR - 0x00ab: 0xc7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xc2, # NOT SIGN - 0x00ae: 0xa8, # REGISTERED SIGN - 0x00af: 0xf8, # MACRON - 0x00b0: 0xa1, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b4: 0xab, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xa6, # PILCROW SIGN - 0x00b7: 0xe1, # MIDDLE DOT - 0x00b8: 0xfc, # CEDILLA - 0x00ba: 0xbc, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0xc8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bf: 0xc0, # INVERTED QUESTION MARK - 0x00c0: 0xcb, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0xe7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xe5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0xcc, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0xae, # LATIN CAPITAL LETTER AE - 0x00c7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0xe9, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0xe6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0xe8, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0xed, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0xea, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xeb, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0xec, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d0: 0xdc, # LATIN CAPITAL LETTER ETH - 0x00d1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xf1, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xee, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xef, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xcd, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d8: 0xaf, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0xf4, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xf2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xf3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0xa0, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0xde, # LATIN CAPITAL LETTER THORN - 0x00df: 0xa7, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x8b, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x8a, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x8c, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0xbe, # LATIN SMALL LETTER AE - 0x00e7: 0x8d, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x8f, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x8e, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x93, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f0: 0xdd, # LATIN SMALL LETTER ETH - 0x00f1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x98, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x9b, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x9a, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xd6, # DIVISION SIGN - 0x00f8: 0xbf, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0x9d, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x9c, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x9e, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x9f, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0xe0, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0xdf, # LATIN SMALL LETTER THORN - 0x00ff: 0xd8, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0131: 0xf5, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0xce, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xcf, # LATIN SMALL LIGATURE OE - 0x0178: 0xd9, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0xc4, # LATIN SMALL LETTER F WITH HOOK - 0x02c6: 0xf6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02c7: 0xff, # CARON - 0x02d8: 0xf9, # BREVE - 0x02d9: 0xfa, # DOT ABOVE - 0x02da: 0xfb, # RING ABOVE - 0x02db: 0xfe, # OGONEK - 0x02dc: 0xf7, # SMALL TILDE - 0x02dd: 0xfd, # DOUBLE ACUTE ACCENT - 0x03a9: 0xbd, # GREEK CAPITAL LETTER OMEGA - 0x03c0: 0xb9, # GREEK SMALL LETTER PI - 0x2013: 0xd0, # EN DASH - 0x2014: 0xd1, # EM DASH - 0x2018: 0xd4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xd5, # RIGHT SINGLE QUOTATION MARK - 0x201a: 0xe2, # SINGLE LOW-9 QUOTATION MARK - 0x201c: 0xd2, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0xd3, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0xe3, # DOUBLE LOW-9 QUOTATION MARK - 0x2022: 0xa5, # BULLET - 0x2026: 0xc9, # HORIZONTAL ELLIPSIS - 0x2030: 0xe4, # PER MILLE SIGN - 0x2044: 0xda, # FRACTION SLASH - 0x20ac: 0xdb, # EURO SIGN - 0x2122: 0xaa, # TRADE MARK SIGN - 0x2202: 0xb6, # PARTIAL DIFFERENTIAL - 0x2206: 0xc6, # INCREMENT - 0x220f: 0xb8, # N-ARY PRODUCT - 0x2211: 0xb7, # N-ARY SUMMATION - 0x221a: 0xc3, # SQUARE ROOT - 0x221e: 0xb0, # INFINITY - 0x222b: 0xba, # INTEGRAL - 0x2248: 0xc5, # ALMOST EQUAL TO - 0x2260: 0xad, # NOT EQUAL TO - 0x2264: 0xb2, # LESS-THAN OR EQUAL TO - 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO - 0x25ca: 0xd7, # LOZENGE - 0xf8ff: 0xf0, # Apple logo -} \ No newline at end of file + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A5: 0xB4, # YEN SIGN + 0x00A7: 0xA4, # SECTION SIGN + 0x00A8: 0xAC, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00AF: 0xF8, # MACRON + 0x00B0: 0xA1, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B4: 0xAB, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xA6, # PILCROW SIGN + 0x00B7: 0xE1, # MIDDLE DOT + 0x00B8: 0xFC, # CEDILLA + 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BF: 0xC0, # INVERTED QUESTION MARK + 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xAE, # LATIN CAPITAL LETTER AE + 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0: 0xDC, # LATIN CAPITAL LETTER ETH + 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD: 0xA0, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN + 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S + 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xBE, # LATIN SMALL LETTER AE + 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F0: 0xDD, # LATIN SMALL LETTER ETH + 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xD6, # DIVISION SIGN + 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FD: 0xE0, # LATIN SMALL LETTER Y WITH ACUTE + 0x00FE: 0xDF, # LATIN SMALL LETTER THORN + 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE + 0x0153: 0xCF, # LATIN SMALL LIGATURE OE + 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK + 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02C7: 0xFF, # CARON + 0x02D8: 0xF9, # BREVE + 0x02D9: 0xFA, # DOT ABOVE + 0x02DA: 0xFB, # RING ABOVE + 0x02DB: 0xFE, # OGONEK + 0x02DC: 0xF7, # SMALL TILDE + 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT + 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA + 0x03C0: 0xB9, # GREEK SMALL LETTER PI + 0x2013: 0xD0, # EN DASH + 0x2014: 0xD1, # EM DASH + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK + 0x2022: 0xA5, # BULLET + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS + 0x2030: 0xE4, # PER MILLE SIGN + 0x2044: 0xDA, # FRACTION SLASH + 0x20AC: 0xDB, # EURO SIGN + 0x2122: 0xAA, # TRADE MARK SIGN + 0x2202: 0xB6, # PARTIAL DIFFERENTIAL + 0x2206: 0xC6, # INCREMENT + 0x220F: 0xB8, # N-ARY PRODUCT + 0x2211: 0xB7, # N-ARY SUMMATION + 0x221A: 0xC3, # SQUARE ROOT + 0x221E: 0xB0, # INFINITY + 0x222B: 0xBA, # INTEGRAL + 0x2248: 0xC5, # ALMOST EQUAL TO + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO + 0x25CA: 0xD7, # LOZENGE + 0xF8FF: 0xF0, # Apple logo +} + Index: mac_roman.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_roman.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- mac_roman.py 24 Oct 2005 12:07:49 -0000 1.6 +++ mac_roman.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> CONTROL CHARACTER u'\x08' # 0x08 -> CONTROL CHARACTER u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0a -> CONTROL CHARACTER - u'\x0b' # 0x0b -> CONTROL CHARACTER - u'\x0c' # 0x0c -> CONTROL CHARACTER - u'\r' # 0x0d -> CONTROL CHARACTER - u'\x0e' # 0x0e -> CONTROL CHARACTER - u'\x0f' # 0x0f -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER u'\x10' # 0x10 -> CONTROL CHARACTER u'\x11' # 0x11 -> CONTROL CHARACTER u'\x12' # 0x12 -> CONTROL CHARACTER @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> CONTROL CHARACTER u'\x18' # 0x18 -> CONTROL CHARACTER u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1a -> CONTROL CHARACTER - u'\x1b' # 0x1b -> CONTROL CHARACTER - u'\x1c' # 0x1c -> CONTROL CHARACTER - u'\x1d' # 0x1d -> CONTROL CHARACTER - u'\x1e' # 0x1e -> CONTROL CHARACTER - u'\x1f' # 0x1f -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> CONTROL CHARACTER + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA @@ -170,12 +170,12 @@ u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8a -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8b -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8c -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8d -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8e -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8f -> LATIN SMALL LETTER E WITH GRAVE + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE @@ -186,108 +186,108 @@ u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9a -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9b -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9c -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9d -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9e -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9f -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xa0 -> DAGGER - u'\xb0' # 0xa1 -> DEGREE SIGN - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa7' # 0xa4 -> SECTION SIGN - u'\u2022' # 0xa5 -> BULLET - u'\xb6' # 0xa6 -> PILCROW SIGN - u'\xdf' # 0xa7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xa8 -> REGISTERED SIGN - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\u2122' # 0xaa -> TRADE MARK SIGN - u'\xb4' # 0xab -> ACUTE ACCENT - u'\xa8' # 0xac -> DIAERESIS - u'\u2260' # 0xad -> NOT EQUAL TO - u'\xc6' # 0xae -> LATIN CAPITAL LETTER AE - u'\xd8' # 0xaf -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xb0 -> INFINITY - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\u2264' # 0xb2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xb3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xb4 -> YEN SIGN - u'\xb5' # 0xb5 -> MICRO SIGN - u'\u2202' # 0xb6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xb7 -> N-ARY SUMMATION - u'\u220f' # 0xb8 -> N-ARY PRODUCT - u'\u03c0' # 0xb9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xba -> INTEGRAL - u'\xaa' # 0xbb -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xbc -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xbd -> GREEK CAPITAL LETTER OMEGA - u'\xe6' # 0xbe -> LATIN SMALL LETTER AE - u'\xf8' # 0xbf -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xc0 -> INVERTED QUESTION MARK - u'\xa1' # 0xc1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xc2 -> NOT SIGN - u'\u221a' # 0xc3 -> SQUARE ROOT - u'\u0192' # 0xc4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xc5 -> ALMOST EQUAL TO - u'\u2206' # 0xc6 -> INCREMENT - u'\xab' # 0xc7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xc8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xc9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xca -> NO-BREAK SPACE - u'\xc0' # 0xcb -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xcc -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xcd -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xce -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xcf -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xd0 -> EN DASH - u'\u2014' # 0xd1 -> EM DASH - u'\u201c' # 0xd2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xd3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xd4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xd5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xd6 -> DIVISION SIGN - u'\u25ca' # 0xd7 -> LOZENGE - u'\xff' # 0xd8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xd9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u2044' # 0xda -> FRACTION SLASH - u'\u20ac' # 0xdb -> EURO SIGN - u'\u2039' # 0xdc -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xdd -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufb01' # 0xde -> LATIN SMALL LIGATURE FI - u'\ufb02' # 0xdf -> LATIN SMALL LIGATURE FL - u'\u2021' # 0xe0 -> DOUBLE DAGGER - u'\xb7' # 0xe1 -> MIDDLE DOT - u'\u201a' # 0xe2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xe3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xe4 -> PER MILLE SIGN - u'\xc2' # 0xe5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xe6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xe7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xe8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xe9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xea -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xeb -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xec -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xed -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xee -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xef -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xf0 -> Apple logo - u'\xd2' # 0xf1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xf2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xf3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xf4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xf5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xf6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xf7 -> SMALL TILDE - u'\xaf' # 0xf8 -> MACRON - u'\u02d8' # 0xf9 -> BREVE - u'\u02d9' # 0xfa -> DOT ABOVE - u'\u02da' # 0xfb -> RING ABOVE - u'\xb8' # 0xfc -> CEDILLA - u'\u02dd' # 0xfd -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xfe -> OGONEK - u'\u02c7' # 0xff -> CARON + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\xb4' # 0xAB -> ACUTE ACCENT + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE + u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0xB4 -> YEN SIGN + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u220f' # 0xB8 -> N-ARY PRODUCT + u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI + u'\u222b' # 0xBA -> INTEGRAL + u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR + u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA + u'\xe6' # 0xBE -> LATIN SMALL LETTER AE + u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0xC0 -> INVERTED QUESTION MARK + u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u2044' # 0xDA -> FRACTION SLASH + u'\u20ac' # 0xDB -> EURO SIGN + u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufb01' # 0xDE -> LATIN SMALL LIGATURE FI + u'\ufb02' # 0xDF -> LATIN SMALL LIGATURE FL + u'\u2021' # 0xE0 -> DOUBLE DAGGER + u'\xb7' # 0xE1 -> MIDDLE DOT + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0xE4 -> PER MILLE SIGN + u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0xF0 -> Apple logo + u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0xF7 -> SMALL TILDE + u'\xaf' # 0xF8 -> MACRON + u'\u02d8' # 0xF9 -> BREVE + u'\u02d9' # 0xFA -> DOT ABOVE + u'\u02da' # 0xFB -> RING ABOVE + u'\xb8' # 0xFC -> CEDILLA + u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT + u'\u02db' # 0xFE -> OGONEK + u'\u02c7' # 0xFF -> CARON ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # CONTROL CHARACTER 0x0008: 0x08, # CONTROL CHARACTER 0x0009: 0x09, # CONTROL CHARACTER - 0x000a: 0x0a, # CONTROL CHARACTER - 0x000b: 0x0b, # CONTROL CHARACTER - 0x000c: 0x0c, # CONTROL CHARACTER - 0x000d: 0x0d, # CONTROL CHARACTER - 0x000e: 0x0e, # CONTROL CHARACTER - 0x000f: 0x0f, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER 0x0010: 0x10, # CONTROL CHARACTER 0x0011: 0x11, # CONTROL CHARACTER 0x0012: 0x12, # CONTROL CHARACTER @@ -319,12 +319,12 @@ 0x0017: 0x17, # CONTROL CHARACTER 0x0018: 0x18, # CONTROL CHARACTER 0x0019: 0x19, # CONTROL CHARACTER - 0x001a: 0x1a, # CONTROL CHARACTER - 0x001b: 0x1b, # CONTROL CHARACTER - 0x001c: 0x1c, # CONTROL CHARACTER - 0x001d: 0x1d, # CONTROL CHARACTER - 0x001e: 0x1e, # CONTROL CHARACTER - 0x001f: 0x1f, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,138 +415,139 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # CONTROL CHARACTER - 0x00a0: 0xca, # NO-BREAK SPACE - 0x00a1: 0xc1, # INVERTED EXCLAMATION MARK - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a5: 0xb4, # YEN SIGN - 0x00a7: 0xa4, # SECTION SIGN - 0x00a8: 0xac, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00aa: 0xbb, # FEMININE ORDINAL INDICATOR - 0x00ab: 0xc7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xc2, # NOT SIGN - 0x00ae: 0xa8, # REGISTERED SIGN - 0x00af: 0xf8, # MACRON - 0x00b0: 0xa1, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b4: 0xab, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xa6, # PILCROW SIGN - 0x00b7: 0xe1, # MIDDLE DOT - 0x00b8: 0xfc, # CEDILLA - 0x00ba: 0xbc, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0xc8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bf: 0xc0, # INVERTED QUESTION MARK - 0x00c0: 0xcb, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0xe7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xe5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0xcc, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0xae, # LATIN CAPITAL LETTER AE - 0x00c7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0xe9, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0xe6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0xe8, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0xed, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0xea, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xeb, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0xec, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xf1, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xee, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xef, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xcd, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d8: 0xaf, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0xf4, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xf2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xf3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0xa7, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x8b, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x8a, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x8c, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0xbe, # LATIN SMALL LETTER AE - 0x00e7: 0x8d, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x8f, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x8e, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x93, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x98, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x9b, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x9a, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xd6, # DIVISION SIGN - 0x00f8: 0xbf, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0x9d, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x9c, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x9e, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x9f, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0xd8, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0131: 0xf5, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0xce, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xcf, # LATIN SMALL LIGATURE OE - 0x0178: 0xd9, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0xc4, # LATIN SMALL LETTER F WITH HOOK - 0x02c6: 0xf6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02c7: 0xff, # CARON - 0x02d8: 0xf9, # BREVE - 0x02d9: 0xfa, # DOT ABOVE - 0x02da: 0xfb, # RING ABOVE - 0x02db: 0xfe, # OGONEK - 0x02dc: 0xf7, # SMALL TILDE - 0x02dd: 0xfd, # DOUBLE ACUTE ACCENT - 0x03a9: 0xbd, # GREEK CAPITAL LETTER OMEGA - 0x03c0: 0xb9, # GREEK SMALL LETTER PI - 0x2013: 0xd0, # EN DASH - 0x2014: 0xd1, # EM DASH - 0x2018: 0xd4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xd5, # RIGHT SINGLE QUOTATION MARK - 0x201a: 0xe2, # SINGLE LOW-9 QUOTATION MARK - 0x201c: 0xd2, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0xd3, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0xe3, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xa0, # DAGGER - 0x2021: 0xe0, # DOUBLE DAGGER - 0x2022: 0xa5, # BULLET - 0x2026: 0xc9, # HORIZONTAL ELLIPSIS - 0x2030: 0xe4, # PER MILLE SIGN - 0x2039: 0xdc, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203a: 0xdd, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x2044: 0xda, # FRACTION SLASH - 0x20ac: 0xdb, # EURO SIGN - 0x2122: 0xaa, # TRADE MARK SIGN - 0x2202: 0xb6, # PARTIAL DIFFERENTIAL - 0x2206: 0xc6, # INCREMENT - 0x220f: 0xb8, # N-ARY PRODUCT - 0x2211: 0xb7, # N-ARY SUMMATION - 0x221a: 0xc3, # SQUARE ROOT - 0x221e: 0xb0, # INFINITY - 0x222b: 0xba, # INTEGRAL - 0x2248: 0xc5, # ALMOST EQUAL TO - 0x2260: 0xad, # NOT EQUAL TO - 0x2264: 0xb2, # LESS-THAN OR EQUAL TO - 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO - 0x25ca: 0xd7, # LOZENGE - 0xf8ff: 0xf0, # Apple logo - 0xfb01: 0xde, # LATIN SMALL LIGATURE FI - 0xfb02: 0xdf, # LATIN SMALL LIGATURE FL -} \ No newline at end of file + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A5: 0xB4, # YEN SIGN + 0x00A7: 0xA4, # SECTION SIGN + 0x00A8: 0xAC, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00AF: 0xF8, # MACRON + 0x00B0: 0xA1, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B4: 0xAB, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xA6, # PILCROW SIGN + 0x00B7: 0xE1, # MIDDLE DOT + 0x00B8: 0xFC, # CEDILLA + 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BF: 0xC0, # INVERTED QUESTION MARK + 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xAE, # LATIN CAPITAL LETTER AE + 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S + 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xBE, # LATIN SMALL LETTER AE + 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xD6, # DIVISION SIGN + 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE + 0x0153: 0xCF, # LATIN SMALL LIGATURE OE + 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK + 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02C7: 0xFF, # CARON + 0x02D8: 0xF9, # BREVE + 0x02D9: 0xFA, # DOT ABOVE + 0x02DA: 0xFB, # RING ABOVE + 0x02DB: 0xFE, # OGONEK + 0x02DC: 0xF7, # SMALL TILDE + 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT + 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA + 0x03C0: 0xB9, # GREEK SMALL LETTER PI + 0x2013: 0xD0, # EN DASH + 0x2014: 0xD1, # EM DASH + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0xA0, # DAGGER + 0x2021: 0xE0, # DOUBLE DAGGER + 0x2022: 0xA5, # BULLET + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS + 0x2030: 0xE4, # PER MILLE SIGN + 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x2044: 0xDA, # FRACTION SLASH + 0x20AC: 0xDB, # EURO SIGN + 0x2122: 0xAA, # TRADE MARK SIGN + 0x2202: 0xB6, # PARTIAL DIFFERENTIAL + 0x2206: 0xC6, # INCREMENT + 0x220F: 0xB8, # N-ARY PRODUCT + 0x2211: 0xB7, # N-ARY SUMMATION + 0x221A: 0xC3, # SQUARE ROOT + 0x221E: 0xB0, # INFINITY + 0x222B: 0xBA, # INTEGRAL + 0x2248: 0xC5, # ALMOST EQUAL TO + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO + 0x25CA: 0xD7, # LOZENGE + 0xF8FF: 0xF0, # Apple logo + 0xFB01: 0xDE, # LATIN SMALL LIGATURE FI + 0xFB02: 0xDF, # LATIN SMALL LIGATURE FL +} + Index: mac_romanian.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_romanian.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- mac_romanian.py 24 Oct 2005 12:07:49 -0000 1.2 +++ mac_romanian.py 24 Oct 2005 12:14:59 -0000 1.3 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> CONTROL CHARACTER u'\x08' # 0x08 -> CONTROL CHARACTER u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0a -> CONTROL CHARACTER - u'\x0b' # 0x0b -> CONTROL CHARACTER - u'\x0c' # 0x0c -> CONTROL CHARACTER - u'\r' # 0x0d -> CONTROL CHARACTER - u'\x0e' # 0x0e -> CONTROL CHARACTER - u'\x0f' # 0x0f -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER u'\x10' # 0x10 -> CONTROL CHARACTER u'\x11' # 0x11 -> CONTROL CHARACTER u'\x12' # 0x12 -> CONTROL CHARACTER @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> CONTROL CHARACTER u'\x18' # 0x18 -> CONTROL CHARACTER u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1a -> CONTROL CHARACTER - u'\x1b' # 0x1b -> CONTROL CHARACTER - u'\x1c' # 0x1c -> CONTROL CHARACTER - u'\x1d' # 0x1d -> CONTROL CHARACTER - u'\x1e' # 0x1e -> CONTROL CHARACTER - u'\x1f' # 0x1f -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> CONTROL CHARACTER + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA @@ -170,12 +170,12 @@ u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8a -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8b -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8c -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8d -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8e -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8f -> LATIN SMALL LETTER E WITH GRAVE + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE @@ -186,108 +186,108 @@ u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9a -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9b -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9c -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9d -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9e -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9f -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xa0 -> DAGGER - u'\xb0' # 0xa1 -> DEGREE SIGN - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa7' # 0xa4 -> SECTION SIGN - u'\u2022' # 0xa5 -> BULLET - u'\xb6' # 0xa6 -> PILCROW SIGN - u'\xdf' # 0xa7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xa8 -> REGISTERED SIGN - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\u2122' # 0xaa -> TRADE MARK SIGN - u'\xb4' # 0xab -> ACUTE ACCENT - u'\xa8' # 0xac -> DIAERESIS - u'\u2260' # 0xad -> NOT EQUAL TO - u'\u0102' # 0xae -> LATIN CAPITAL LETTER A WITH BREVE - u'\u0218' # 0xaf -> LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later - u'\u221e' # 0xb0 -> INFINITY - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\u2264' # 0xb2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xb3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xb4 -> YEN SIGN - u'\xb5' # 0xb5 -> MICRO SIGN - u'\u2202' # 0xb6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xb7 -> N-ARY SUMMATION - u'\u220f' # 0xb8 -> N-ARY PRODUCT - u'\u03c0' # 0xb9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xba -> INTEGRAL - u'\xaa' # 0xbb -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xbc -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xbd -> GREEK CAPITAL LETTER OMEGA - u'\u0103' # 0xbe -> LATIN SMALL LETTER A WITH BREVE - u'\u0219' # 0xbf -> LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later - u'\xbf' # 0xc0 -> INVERTED QUESTION MARK - u'\xa1' # 0xc1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xc2 -> NOT SIGN - u'\u221a' # 0xc3 -> SQUARE ROOT - u'\u0192' # 0xc4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xc5 -> ALMOST EQUAL TO - u'\u2206' # 0xc6 -> INCREMENT - u'\xab' # 0xc7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xc8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xc9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xca -> NO-BREAK SPACE - u'\xc0' # 0xcb -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xcc -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xcd -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xce -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xcf -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xd0 -> EN DASH - u'\u2014' # 0xd1 -> EM DASH - u'\u201c' # 0xd2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xd3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xd4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xd5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xd6 -> DIVISION SIGN - u'\u25ca' # 0xd7 -> LOZENGE - u'\xff' # 0xd8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xd9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u2044' # 0xda -> FRACTION SLASH - u'\u20ac' # 0xdb -> EURO SIGN - u'\u2039' # 0xdc -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xdd -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u021a' # 0xde -> LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later - u'\u021b' # 0xdf -> LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later - u'\u2021' # 0xe0 -> DOUBLE DAGGER - u'\xb7' # 0xe1 -> MIDDLE DOT - u'\u201a' # 0xe2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xe3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xe4 -> PER MILLE SIGN - u'\xc2' # 0xe5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xe6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xe7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xe8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xe9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xea -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xeb -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xec -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xed -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xee -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xef -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xf0 -> Apple logo - u'\xd2' # 0xf1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xf2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xf3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xf4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xf5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xf6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xf7 -> SMALL TILDE - u'\xaf' # 0xf8 -> MACRON - u'\u02d8' # 0xf9 -> BREVE - u'\u02d9' # 0xfa -> DOT ABOVE - u'\u02da' # 0xfb -> RING ABOVE - u'\xb8' # 0xfc -> CEDILLA - u'\u02dd' # 0xfd -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xfe -> OGONEK - u'\u02c7' # 0xff -> CARON + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\xb4' # 0xAB -> ACUTE ACCENT + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\u0102' # 0xAE -> LATIN CAPITAL LETTER A WITH BREVE + u'\u0218' # 0xAF -> LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0xB4 -> YEN SIGN + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u220f' # 0xB8 -> N-ARY PRODUCT + u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI + u'\u222b' # 0xBA -> INTEGRAL + u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR + u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA + u'\u0103' # 0xBE -> LATIN SMALL LETTER A WITH BREVE + u'\u0219' # 0xBF -> LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later + u'\xbf' # 0xC0 -> INVERTED QUESTION MARK + u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u2044' # 0xDA -> FRACTION SLASH + u'\u20ac' # 0xDB -> EURO SIGN + u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u021a' # 0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later + u'\u021b' # 0xDF -> LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later + u'\u2021' # 0xE0 -> DOUBLE DAGGER + u'\xb7' # 0xE1 -> MIDDLE DOT + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0xE4 -> PER MILLE SIGN + u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0xF0 -> Apple logo + u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0xF7 -> SMALL TILDE + u'\xaf' # 0xF8 -> MACRON + u'\u02d8' # 0xF9 -> BREVE + u'\u02d9' # 0xFA -> DOT ABOVE + u'\u02da' # 0xFB -> RING ABOVE + u'\xb8' # 0xFC -> CEDILLA + u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT + u'\u02db' # 0xFE -> OGONEK + u'\u02c7' # 0xFF -> CARON ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # CONTROL CHARACTER 0x0008: 0x08, # CONTROL CHARACTER 0x0009: 0x09, # CONTROL CHARACTER - 0x000a: 0x0a, # CONTROL CHARACTER - 0x000b: 0x0b, # CONTROL CHARACTER - 0x000c: 0x0c, # CONTROL CHARACTER - 0x000d: 0x0d, # CONTROL CHARACTER - 0x000e: 0x0e, # CONTROL CHARACTER - 0x000f: 0x0f, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER 0x0010: 0x10, # CONTROL CHARACTER 0x0011: 0x11, # CONTROL CHARACTER 0x0012: 0x12, # CONTROL CHARACTER @@ -319,12 +319,12 @@ 0x0017: 0x17, # CONTROL CHARACTER 0x0018: 0x18, # CONTROL CHARACTER 0x0019: 0x19, # CONTROL CHARACTER - 0x001a: 0x1a, # CONTROL CHARACTER - 0x001b: 0x1b, # CONTROL CHARACTER - 0x001c: 0x1c, # CONTROL CHARACTER - 0x001d: 0x1d, # CONTROL CHARACTER - 0x001e: 0x1e, # CONTROL CHARACTER - 0x001f: 0x1f, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,138 +415,139 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # CONTROL CHARACTER - 0x00a0: 0xca, # NO-BREAK SPACE - 0x00a1: 0xc1, # INVERTED EXCLAMATION MARK - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a5: 0xb4, # YEN SIGN - 0x00a7: 0xa4, # SECTION SIGN - 0x00a8: 0xac, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00aa: 0xbb, # FEMININE ORDINAL INDICATOR - 0x00ab: 0xc7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xc2, # NOT SIGN - 0x00ae: 0xa8, # REGISTERED SIGN - 0x00af: 0xf8, # MACRON - 0x00b0: 0xa1, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b4: 0xab, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xa6, # PILCROW SIGN - 0x00b7: 0xe1, # MIDDLE DOT - 0x00b8: 0xfc, # CEDILLA - 0x00ba: 0xbc, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0xc8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bf: 0xc0, # INVERTED QUESTION MARK - 0x00c0: 0xcb, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0xe7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xe5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0xcc, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0xe9, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0xe6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0xe8, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0xed, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0xea, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xeb, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0xec, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xf1, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xee, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xef, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xcd, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d9: 0xf4, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xf2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xf3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0xa7, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x8b, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x8a, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x8c, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e7: 0x8d, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x8f, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x8e, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x93, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x98, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x9b, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x9a, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xd6, # DIVISION SIGN - 0x00f9: 0x9d, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x9c, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x9e, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x9f, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0xd8, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0102: 0xae, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0xbe, # LATIN SMALL LETTER A WITH BREVE - 0x0131: 0xf5, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0xce, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xcf, # LATIN SMALL LIGATURE OE - 0x0178: 0xd9, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0xc4, # LATIN SMALL LETTER F WITH HOOK - 0x0218: 0xaf, # LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later - 0x0219: 0xbf, # LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later - 0x021a: 0xde, # LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later - 0x021b: 0xdf, # LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later - 0x02c6: 0xf6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02c7: 0xff, # CARON - 0x02d8: 0xf9, # BREVE - 0x02d9: 0xfa, # DOT ABOVE - 0x02da: 0xfb, # RING ABOVE - 0x02db: 0xfe, # OGONEK - 0x02dc: 0xf7, # SMALL TILDE - 0x02dd: 0xfd, # DOUBLE ACUTE ACCENT - 0x03a9: 0xbd, # GREEK CAPITAL LETTER OMEGA - 0x03c0: 0xb9, # GREEK SMALL LETTER PI - 0x2013: 0xd0, # EN DASH - 0x2014: 0xd1, # EM DASH - 0x2018: 0xd4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xd5, # RIGHT SINGLE QUOTATION MARK - 0x201a: 0xe2, # SINGLE LOW-9 QUOTATION MARK - 0x201c: 0xd2, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0xd3, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0xe3, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xa0, # DAGGER - 0x2021: 0xe0, # DOUBLE DAGGER - 0x2022: 0xa5, # BULLET - 0x2026: 0xc9, # HORIZONTAL ELLIPSIS - 0x2030: 0xe4, # PER MILLE SIGN - 0x2039: 0xdc, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203a: 0xdd, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x2044: 0xda, # FRACTION SLASH - 0x20ac: 0xdb, # EURO SIGN - 0x2122: 0xaa, # TRADE MARK SIGN - 0x2202: 0xb6, # PARTIAL DIFFERENTIAL - 0x2206: 0xc6, # INCREMENT - 0x220f: 0xb8, # N-ARY PRODUCT - 0x2211: 0xb7, # N-ARY SUMMATION - 0x221a: 0xc3, # SQUARE ROOT - 0x221e: 0xb0, # INFINITY - 0x222b: 0xba, # INTEGRAL - 0x2248: 0xc5, # ALMOST EQUAL TO - 0x2260: 0xad, # NOT EQUAL TO - 0x2264: 0xb2, # LESS-THAN OR EQUAL TO - 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO - 0x25ca: 0xd7, # LOZENGE - 0xf8ff: 0xf0, # Apple logo -} \ No newline at end of file + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A5: 0xB4, # YEN SIGN + 0x00A7: 0xA4, # SECTION SIGN + 0x00A8: 0xAC, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00AF: 0xF8, # MACRON + 0x00B0: 0xA1, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B4: 0xAB, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xA6, # PILCROW SIGN + 0x00B7: 0xE1, # MIDDLE DOT + 0x00B8: 0xFC, # CEDILLA + 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BF: 0xC0, # INVERTED QUESTION MARK + 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S + 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xD6, # DIVISION SIGN + 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0102: 0xAE, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0xBE, # LATIN SMALL LETTER A WITH BREVE + 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE + 0x0153: 0xCF, # LATIN SMALL LIGATURE OE + 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK + 0x0218: 0xAF, # LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later + 0x0219: 0xBF, # LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later + 0x021A: 0xDE, # LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later + 0x021B: 0xDF, # LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later + 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02C7: 0xFF, # CARON + 0x02D8: 0xF9, # BREVE + 0x02D9: 0xFA, # DOT ABOVE + 0x02DA: 0xFB, # RING ABOVE + 0x02DB: 0xFE, # OGONEK + 0x02DC: 0xF7, # SMALL TILDE + 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT + 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA + 0x03C0: 0xB9, # GREEK SMALL LETTER PI + 0x2013: 0xD0, # EN DASH + 0x2014: 0xD1, # EM DASH + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0xA0, # DAGGER + 0x2021: 0xE0, # DOUBLE DAGGER + 0x2022: 0xA5, # BULLET + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS + 0x2030: 0xE4, # PER MILLE SIGN + 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x2044: 0xDA, # FRACTION SLASH + 0x20AC: 0xDB, # EURO SIGN + 0x2122: 0xAA, # TRADE MARK SIGN + 0x2202: 0xB6, # PARTIAL DIFFERENTIAL + 0x2206: 0xC6, # INCREMENT + 0x220F: 0xB8, # N-ARY PRODUCT + 0x2211: 0xB7, # N-ARY SUMMATION + 0x221A: 0xC3, # SQUARE ROOT + 0x221E: 0xB0, # INFINITY + 0x222B: 0xBA, # INTEGRAL + 0x2248: 0xC5, # ALMOST EQUAL TO + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO + 0x25CA: 0xD7, # LOZENGE + 0xF8FF: 0xF0, # Apple logo +} + Index: mac_turkish.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/mac_turkish.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- mac_turkish.py 24 Oct 2005 12:07:49 -0000 1.6 +++ mac_turkish.py 24 Oct 2005 12:14:59 -0000 1.7 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> CONTROL CHARACTER u'\x08' # 0x08 -> CONTROL CHARACTER u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0a -> CONTROL CHARACTER - u'\x0b' # 0x0b -> CONTROL CHARACTER - u'\x0c' # 0x0c -> CONTROL CHARACTER - u'\r' # 0x0d -> CONTROL CHARACTER - u'\x0e' # 0x0e -> CONTROL CHARACTER - u'\x0f' # 0x0f -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER u'\x10' # 0x10 -> CONTROL CHARACTER u'\x11' # 0x11 -> CONTROL CHARACTER u'\x12' # 0x12 -> CONTROL CHARACTER @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> CONTROL CHARACTER u'\x18' # 0x18 -> CONTROL CHARACTER u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1a -> CONTROL CHARACTER - u'\x1b' # 0x1b -> CONTROL CHARACTER - u'\x1c' # 0x1c -> CONTROL CHARACTER - u'\x1d' # 0x1d -> CONTROL CHARACTER - u'\x1e' # 0x1e -> CONTROL CHARACTER - u'\x1f' # 0x1f -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> CONTROL CHARACTER + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA @@ -170,12 +170,12 @@ u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8a -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8b -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8c -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8d -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8e -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8f -> LATIN SMALL LETTER E WITH GRAVE + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE @@ -186,108 +186,108 @@ u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9a -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9b -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9c -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9d -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9e -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9f -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xa0 -> DAGGER - u'\xb0' # 0xa1 -> DEGREE SIGN - u'\xa2' # 0xa2 -> CENT SIGN - u'\xa3' # 0xa3 -> POUND SIGN - u'\xa7' # 0xa4 -> SECTION SIGN - u'\u2022' # 0xa5 -> BULLET - u'\xb6' # 0xa6 -> PILCROW SIGN - u'\xdf' # 0xa7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xa8 -> REGISTERED SIGN - u'\xa9' # 0xa9 -> COPYRIGHT SIGN - u'\u2122' # 0xaa -> TRADE MARK SIGN - u'\xb4' # 0xab -> ACUTE ACCENT - u'\xa8' # 0xac -> DIAERESIS - u'\u2260' # 0xad -> NOT EQUAL TO - u'\xc6' # 0xae -> LATIN CAPITAL LETTER AE - u'\xd8' # 0xaf -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xb0 -> INFINITY - u'\xb1' # 0xb1 -> PLUS-MINUS SIGN - u'\u2264' # 0xb2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xb3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xb4 -> YEN SIGN - u'\xb5' # 0xb5 -> MICRO SIGN - u'\u2202' # 0xb6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xb7 -> N-ARY SUMMATION - u'\u220f' # 0xb8 -> N-ARY PRODUCT - u'\u03c0' # 0xb9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xba -> INTEGRAL - u'\xaa' # 0xbb -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xbc -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xbd -> GREEK CAPITAL LETTER OMEGA - u'\xe6' # 0xbe -> LATIN SMALL LETTER AE - u'\xf8' # 0xbf -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xc0 -> INVERTED QUESTION MARK - u'\xa1' # 0xc1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xc2 -> NOT SIGN - u'\u221a' # 0xc3 -> SQUARE ROOT - u'\u0192' # 0xc4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xc5 -> ALMOST EQUAL TO - u'\u2206' # 0xc6 -> INCREMENT - u'\xab' # 0xc7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xc8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xc9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xca -> NO-BREAK SPACE - u'\xc0' # 0xcb -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xcc -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xcd -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xce -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xcf -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xd0 -> EN DASH - u'\u2014' # 0xd1 -> EM DASH - u'\u201c' # 0xd2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xd3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xd4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xd5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xd6 -> DIVISION SIGN - u'\u25ca' # 0xd7 -> LOZENGE - u'\xff' # 0xd8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xd9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u011e' # 0xda -> LATIN CAPITAL LETTER G WITH BREVE - u'\u011f' # 0xdb -> LATIN SMALL LETTER G WITH BREVE - u'\u0130' # 0xdc -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u0131' # 0xdd -> LATIN SMALL LETTER DOTLESS I - u'\u015e' # 0xde -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u015f' # 0xdf -> LATIN SMALL LETTER S WITH CEDILLA - u'\u2021' # 0xe0 -> DOUBLE DAGGER - u'\xb7' # 0xe1 -> MIDDLE DOT - u'\u201a' # 0xe2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xe3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xe4 -> PER MILLE SIGN - u'\xc2' # 0xe5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xe6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xe7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xe8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xe9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xea -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xeb -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xec -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xed -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xee -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xef -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xf0 -> Apple logo - u'\xd2' # 0xf1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xf2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xf3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xf4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\uf8a0' # 0xf5 -> undefined1 - u'\u02c6' # 0xf6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xf7 -> SMALL TILDE - u'\xaf' # 0xf8 -> MACRON - u'\u02d8' # 0xf9 -> BREVE - u'\u02d9' # 0xfa -> DOT ABOVE - u'\u02da' # 0xfb -> RING ABOVE - u'\xb8' # 0xfc -> CEDILLA - u'\u02dd' # 0xfd -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xfe -> OGONEK - u'\u02c7' # 0xff -> CARON + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\xb4' # 0xAB -> ACUTE ACCENT + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE + u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0xB4 -> YEN SIGN + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u220f' # 0xB8 -> N-ARY PRODUCT + u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI + u'\u222b' # 0xBA -> INTEGRAL + u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR + u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA + u'\xe6' # 0xBE -> LATIN SMALL LETTER AE + u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0xC0 -> INVERTED QUESTION MARK + u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u011e' # 0xDA -> LATIN CAPITAL LETTER G WITH BREVE + u'\u011f' # 0xDB -> LATIN SMALL LETTER G WITH BREVE + u'\u0130' # 0xDC -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u0131' # 0xDD -> LATIN SMALL LETTER DOTLESS I + u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u015f' # 0xDF -> LATIN SMALL LETTER S WITH CEDILLA + u'\u2021' # 0xE0 -> DOUBLE DAGGER + u'\xb7' # 0xE1 -> MIDDLE DOT + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0xE4 -> PER MILLE SIGN + u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0xF0 -> Apple logo + u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\uf8a0' # 0xF5 -> undefined1 + u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0xF7 -> SMALL TILDE + u'\xaf' # 0xF8 -> MACRON + u'\u02d8' # 0xF9 -> BREVE + u'\u02d9' # 0xFA -> DOT ABOVE + u'\u02da' # 0xFB -> RING ABOVE + u'\xb8' # 0xFC -> CEDILLA + u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT + u'\u02db' # 0xFE -> OGONEK + u'\u02c7' # 0xFF -> CARON ) ### Encoding Map @@ -303,12 +303,12 @@ 0x0007: 0x07, # CONTROL CHARACTER 0x0008: 0x08, # CONTROL CHARACTER 0x0009: 0x09, # CONTROL CHARACTER - 0x000a: 0x0a, # CONTROL CHARACTER - 0x000b: 0x0b, # CONTROL CHARACTER - 0x000c: 0x0c, # CONTROL CHARACTER - 0x000d: 0x0d, # CONTROL CHARACTER - 0x000e: 0x0e, # CONTROL CHARACTER - 0x000f: 0x0f, # CONTROL CHARACTER + 0x000A: 0x0A, # CONTROL CHARACTER + 0x000B: 0x0B, # CONTROL CHARACTER + 0x000C: 0x0C, # CONTROL CHARACTER + 0x000D: 0x0D, # CONTROL CHARACTER + 0x000E: 0x0E, # CONTROL CHARACTER + 0x000F: 0x0F, # CONTROL CHARACTER 0x0010: 0x10, # CONTROL CHARACTER 0x0011: 0x11, # CONTROL CHARACTER 0x0012: 0x12, # CONTROL CHARACTER @@ -319,12 +319,12 @@ 0x0017: 0x17, # CONTROL CHARACTER 0x0018: 0x18, # CONTROL CHARACTER 0x0019: 0x19, # CONTROL CHARACTER - 0x001a: 0x1a, # CONTROL CHARACTER - 0x001b: 0x1b, # CONTROL CHARACTER - 0x001c: 0x1c, # CONTROL CHARACTER - 0x001d: 0x1d, # CONTROL CHARACTER - 0x001e: 0x1e, # CONTROL CHARACTER - 0x001f: 0x1f, # CONTROL CHARACTER + 0x001A: 0x1A, # CONTROL CHARACTER + 0x001B: 0x1B, # CONTROL CHARACTER + 0x001C: 0x1C, # CONTROL CHARACTER + 0x001D: 0x1D, # CONTROL CHARACTER + 0x001E: 0x1E, # CONTROL CHARACTER + 0x001F: 0x1F, # CONTROL CHARACTER 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,138 +415,139 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # CONTROL CHARACTER - 0x00a0: 0xca, # NO-BREAK SPACE - 0x00a1: 0xc1, # INVERTED EXCLAMATION MARK - 0x00a2: 0xa2, # CENT SIGN - 0x00a3: 0xa3, # POUND SIGN - 0x00a5: 0xb4, # YEN SIGN - 0x00a7: 0xa4, # SECTION SIGN - 0x00a8: 0xac, # DIAERESIS - 0x00a9: 0xa9, # COPYRIGHT SIGN - 0x00aa: 0xbb, # FEMININE ORDINAL INDICATOR - 0x00ab: 0xc7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0xc2, # NOT SIGN - 0x00ae: 0xa8, # REGISTERED SIGN - 0x00af: 0xf8, # MACRON - 0x00b0: 0xa1, # DEGREE SIGN - 0x00b1: 0xb1, # PLUS-MINUS SIGN - 0x00b4: 0xab, # ACUTE ACCENT - 0x00b5: 0xb5, # MICRO SIGN - 0x00b6: 0xa6, # PILCROW SIGN - 0x00b7: 0xe1, # MIDDLE DOT - 0x00b8: 0xfc, # CEDILLA - 0x00ba: 0xbc, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0xc8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bf: 0xc0, # INVERTED QUESTION MARK - 0x00c0: 0xcb, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0xe7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0xe5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0xcc, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0xae, # LATIN CAPITAL LETTER AE - 0x00c7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0xe9, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0xe6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0xe8, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0xed, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0xea, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0xeb, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0xec, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0xf1, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0xee, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0xef, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0xcd, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d8: 0xaf, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0xf4, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0xf2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0xf3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0xa7, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x8b, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x8a, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x8c, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0xbe, # LATIN SMALL LETTER AE - 0x00e7: 0x8d, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x8f, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x8e, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x93, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x98, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x9b, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x9a, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0xd6, # DIVISION SIGN - 0x00f8: 0xbf, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0x9d, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x9c, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x9e, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x9f, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0xd8, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x011e: 0xda, # LATIN CAPITAL LETTER G WITH BREVE - 0x011f: 0xdb, # LATIN SMALL LETTER G WITH BREVE - 0x0130: 0xdc, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0xdd, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0xce, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xcf, # LATIN SMALL LIGATURE OE - 0x015e: 0xde, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015f: 0xdf, # LATIN SMALL LETTER S WITH CEDILLA - 0x0178: 0xd9, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0xc4, # LATIN SMALL LETTER F WITH HOOK - 0x02c6: 0xf6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02c7: 0xff, # CARON - 0x02d8: 0xf9, # BREVE - 0x02d9: 0xfa, # DOT ABOVE - 0x02da: 0xfb, # RING ABOVE - 0x02db: 0xfe, # OGONEK - 0x02dc: 0xf7, # SMALL TILDE - 0x02dd: 0xfd, # DOUBLE ACUTE ACCENT - 0x03a9: 0xbd, # GREEK CAPITAL LETTER OMEGA - 0x03c0: 0xb9, # GREEK SMALL LETTER PI - 0x2013: 0xd0, # EN DASH - 0x2014: 0xd1, # EM DASH - 0x2018: 0xd4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xd5, # RIGHT SINGLE QUOTATION MARK - 0x201a: 0xe2, # SINGLE LOW-9 QUOTATION MARK - 0x201c: 0xd2, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0xd3, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0xe3, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xa0, # DAGGER - 0x2021: 0xe0, # DOUBLE DAGGER - 0x2022: 0xa5, # BULLET - 0x2026: 0xc9, # HORIZONTAL ELLIPSIS - 0x2030: 0xe4, # PER MILLE SIGN - 0x2122: 0xaa, # TRADE MARK SIGN - 0x2202: 0xb6, # PARTIAL DIFFERENTIAL - 0x2206: 0xc6, # INCREMENT - 0x220f: 0xb8, # N-ARY PRODUCT - 0x2211: 0xb7, # N-ARY SUMMATION - 0x221a: 0xc3, # SQUARE ROOT - 0x221e: 0xb0, # INFINITY - 0x222b: 0xba, # INTEGRAL - 0x2248: 0xc5, # ALMOST EQUAL TO - 0x2260: 0xad, # NOT EQUAL TO - 0x2264: 0xb2, # LESS-THAN OR EQUAL TO - 0x2265: 0xb3, # GREATER-THAN OR EQUAL TO - 0x25ca: 0xd7, # LOZENGE - 0xf8a0: 0xf5, # undefined1 - 0xf8ff: 0xf0, # Apple logo -} \ No newline at end of file + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # CONTROL CHARACTER + 0x00A0: 0xCA, # NO-BREAK SPACE + 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK + 0x00A2: 0xA2, # CENT SIGN + 0x00A3: 0xA3, # POUND SIGN + 0x00A5: 0xB4, # YEN SIGN + 0x00A7: 0xA4, # SECTION SIGN + 0x00A8: 0xAC, # DIAERESIS + 0x00A9: 0xA9, # COPYRIGHT SIGN + 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR + 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00AC: 0xC2, # NOT SIGN + 0x00AE: 0xA8, # REGISTERED SIGN + 0x00AF: 0xF8, # MACRON + 0x00B0: 0xA1, # DEGREE SIGN + 0x00B1: 0xB1, # PLUS-MINUS SIGN + 0x00B4: 0xAB, # ACUTE ACCENT + 0x00B5: 0xB5, # MICRO SIGN + 0x00B6: 0xA6, # PILCROW SIGN + 0x00B7: 0xE1, # MIDDLE DOT + 0x00B8: 0xFC, # CEDILLA + 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR + 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00BF: 0xC0, # INVERTED QUESTION MARK + 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE + 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6: 0xAE, # LATIN CAPITAL LETTER AE + 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE + 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE + 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE + 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S + 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE + 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE + 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE + 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00E6: 0xBE, # LATIN SMALL LETTER AE + 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA + 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE + 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE + 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE + 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE + 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE + 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE + 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE + 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE + 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00F7: 0xD6, # DIVISION SIGN + 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE + 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE + 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE + 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011E: 0xDA, # LATIN CAPITAL LETTER G WITH BREVE + 0x011F: 0xDB, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0xDC, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0xDD, # LATIN SMALL LETTER DOTLESS I + 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE + 0x0153: 0xCF, # LATIN SMALL LIGATURE OE + 0x015E: 0xDE, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015F: 0xDF, # LATIN SMALL LETTER S WITH CEDILLA + 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK + 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x02C7: 0xFF, # CARON + 0x02D8: 0xF9, # BREVE + 0x02D9: 0xFA, # DOT ABOVE + 0x02DA: 0xFB, # RING ABOVE + 0x02DB: 0xFE, # OGONEK + 0x02DC: 0xF7, # SMALL TILDE + 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT + 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA + 0x03C0: 0xB9, # GREEK SMALL LETTER PI + 0x2013: 0xD0, # EN DASH + 0x2014: 0xD1, # EM DASH + 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK + 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK + 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK + 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK + 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK + 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK + 0x2020: 0xA0, # DAGGER + 0x2021: 0xE0, # DOUBLE DAGGER + 0x2022: 0xA5, # BULLET + 0x2026: 0xC9, # HORIZONTAL ELLIPSIS + 0x2030: 0xE4, # PER MILLE SIGN + 0x2122: 0xAA, # TRADE MARK SIGN + 0x2202: 0xB6, # PARTIAL DIFFERENTIAL + 0x2206: 0xC6, # INCREMENT + 0x220F: 0xB8, # N-ARY PRODUCT + 0x2211: 0xB7, # N-ARY SUMMATION + 0x221A: 0xC3, # SQUARE ROOT + 0x221E: 0xB0, # INFINITY + 0x222B: 0xBA, # INTEGRAL + 0x2248: 0xC5, # ALMOST EQUAL TO + 0x2260: 0xAD, # NOT EQUAL TO + 0x2264: 0xB2, # LESS-THAN OR EQUAL TO + 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO + 0x25CA: 0xD7, # LOZENGE + 0xF8A0: 0xF5, # undefined1 + 0xF8FF: 0xF0, # Apple logo +} + Index: tis_620.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/encodings/tis_620.py,v retrieving revision 1.3 retrieving revision 1.4 diff -u -d -r1.3 -r1.4 --- tis_620.py 24 Oct 2005 12:07:49 -0000 1.3 +++ tis_620.py 24 Oct 2005 12:14:59 -0000 1.4 @@ -42,12 +42,12 @@ u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0a -> LINE FEED - u'\x0b' # 0x0b -> VERTICAL TABULATION - u'\x0c' # 0x0c -> FORM FEED - u'\r' # 0x0d -> CARRIAGE RETURN - u'\x0e' # 0x0e -> SHIFT OUT - u'\x0f' # 0x0f -> SHIFT IN + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO @@ -58,12 +58,12 @@ u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1a -> SUBSTITUTE - u'\x1b' # 0x1b -> ESCAPE - u'\x1c' # 0x1c -> FILE SEPARATOR - u'\x1d' # 0x1d -> GROUP SEPARATOR - u'\x1e' # 0x1e -> RECORD SEPARATOR - u'\x1f' # 0x1f -> UNIT SEPARATOR + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK @@ -74,12 +74,12 @@ u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2a -> ASTERISK - u'+' # 0x2b -> PLUS SIGN - u',' # 0x2c -> COMMA - u'-' # 0x2d -> HYPHEN-MINUS - u'.' # 0x2e -> FULL STOP - u'/' # 0x2f -> SOLIDUS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO @@ -90,12 +90,12 @@ u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE - u':' # 0x3a -> COLON - u';' # 0x3b -> SEMICOLON - u'<' # 0x3c -> LESS-THAN SIGN - u'=' # 0x3d -> EQUALS SIGN - u'>' # 0x3e -> GREATER-THAN SIGN - u'?' # 0x3f -> QUESTION MARK + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B @@ -106,12 +106,12 @@ u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4a -> LATIN CAPITAL LETTER J - u'K' # 0x4b -> LATIN CAPITAL LETTER K - u'L' # 0x4c -> LATIN CAPITAL LETTER L - u'M' # 0x4d -> LATIN CAPITAL LETTER M - u'N' # 0x4e -> LATIN CAPITAL LETTER N - u'O' # 0x4f -> LATIN CAPITAL LETTER O + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R @@ -122,12 +122,12 @@ u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5a -> LATIN CAPITAL LETTER Z - u'[' # 0x5b -> LEFT SQUARE BRACKET - u'\\' # 0x5c -> REVERSE SOLIDUS - u']' # 0x5d -> RIGHT SQUARE BRACKET - u'^' # 0x5e -> CIRCUMFLEX ACCENT - u'_' # 0x5f -> LOW LINE + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B @@ -138,12 +138,12 @@ u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6a -> LATIN SMALL LETTER J - u'k' # 0x6b -> LATIN SMALL LETTER K - u'l' # 0x6c -> LATIN SMALL LETTER L - u'm' # 0x6d -> LATIN SMALL LETTER M - u'n' # 0x6e -> LATIN SMALL LETTER N - u'o' # 0x6f -> LATIN SMALL LETTER O + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R @@ -154,12 +154,12 @@ u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7a -> LATIN SMALL LETTER Z - u'{' # 0x7b -> LEFT CURLY BRACKET - u'|' # 0x7c -> VERTICAL LINE - u'}' # 0x7d -> RIGHT CURLY BRACKET - u'~' # 0x7e -> TILDE - u'\x7f' # 0x7f -> DELETE + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> u'\x81' # 0x81 -> u'\x82' # 0x82 -> @@ -170,12 +170,12 @@ u'\x87' # 0x87 -> u'\x88' # 0x88 -> u'\x89' # 0x89 -> - u'\x8a' # 0x8a -> - u'\x8b' # 0x8b -> - u'\x8c' # 0x8c -> - u'\x8d' # 0x8d -> - u'\x8e' # 0x8e -> - u'\x8f' # 0x8f -> + u'\x8a' # 0x8A -> + u'\x8b' # 0x8B -> + u'\x8c' # 0x8C -> + u'\x8d' # 0x8D -> + u'\x8e' # 0x8E -> + u'\x8f' # 0x8F -> u'\x90' # 0x90 -> u'\x91' # 0x91 -> u'\x92' # 0x92 -> @@ -186,104 +186,104 @@ u'\x97' # 0x97 -> u'\x98' # 0x98 -> u'\x99' # 0x99 -> - u'\x9a' # 0x9a -> - u'\x9b' # 0x9b -> - u'\x9c' # 0x9c -> - u'\x9d' # 0x9d -> - u'\x9e' # 0x9e -> - u'\x9f' # 0x9f -> + u'\x9a' # 0x9A -> + u'\x9b' # 0x9B -> + u'\x9c' # 0x9C -> + u'\x9d' # 0x9D -> + u'\x9e' # 0x9E -> + u'\x9f' # 0x9F -> u'\ufffe' - u'\u0e01' # 0xa1 -> THAI CHARACTER KO KAI - u'\u0e02' # 0xa2 -> THAI CHARACTER KHO KHAI - u'\u0e03' # 0xa3 -> THAI CHARACTER KHO KHUAT - u'\u0e04' # 0xa4 -> THAI CHARACTER KHO KHWAI - u'\u0e05' # 0xa5 -> THAI CHARACTER KHO KHON - u'\u0e06' # 0xa6 -> THAI CHARACTER KHO RAKHANG - u'\u0e07' # 0xa7 -> THAI CHARACTER NGO NGU - u'\u0e08' # 0xa8 -> THAI CHARACTER CHO CHAN - u'\u0e09' # 0xa9 -> THAI CHARACTER CHO CHING - u'\u0e0a' # 0xaa -> THAI CHARACTER CHO CHANG - u'\u0e0b' # 0xab -> THAI CHARACTER SO SO - u'\u0e0c' # 0xac -> THAI CHARACTER CHO CHOE - u'\u0e0d' # 0xad -> THAI CHARACTER YO YING - u'\u0e0e' # 0xae -> THAI CHARACTER DO CHADA - u'\u0e0f' # 0xaf -> THAI CHARACTER TO PATAK - u'\u0e10' # 0xb0 -> THAI CHARACTER THO THAN - u'\u0e11' # 0xb1 -> THAI CHARACTER THO NANGMONTHO - u'\u0e12' # 0xb2 -> THAI CHARACTER THO PHUTHAO - u'\u0e13' # 0xb3 -> THAI CHARACTER NO NEN - u'\u0e14' # 0xb4 -> THAI CHARACTER DO DEK - u'\u0e15' # 0xb5 -> THAI CHARACTER TO TAO - u'\u0e16' # 0xb6 -> THAI CHARACTER THO THUNG - u'\u0e17' # 0xb7 -> THAI CHARACTER THO THAHAN - u'\u0e18' # 0xb8 -> THAI CHARACTER THO THONG - u'\u0e19' # 0xb9 -> THAI CHARACTER NO NU - u'\u0e1a' # 0xba -> THAI CHARACTER BO BAIMAI - u'\u0e1b' # 0xbb -> THAI CHARACTER PO PLA - u'\u0e1c' # 0xbc -> THAI CHARACTER PHO PHUNG - u'\u0e1d' # 0xbd -> THAI CHARACTER FO FA - u'\u0e1e' # 0xbe -> THAI CHARACTER PHO PHAN - u'\u0e1f' # 0xbf -> THAI CHARACTER FO FAN - u'\u0e20' # 0xc0 -> THAI CHARACTER PHO SAMPHAO - u'\u0e21' # 0xc1 -> THAI CHARACTER MO MA - u'\u0e22' # 0xc2 -> THAI CHARACTER YO YAK - u'\u0e23' # 0xc3 -> THAI CHARACTER RO RUA - u'\u0e24' # 0xc4 -> THAI CHARACTER RU - u'\u0e25' # 0xc5 -> THAI CHARACTER LO LING - u'\u0e26' # 0xc6 -> THAI CHARACTER LU - u'\u0e27' # 0xc7 -> THAI CHARACTER WO WAEN - u'\u0e28' # 0xc8 -> THAI CHARACTER SO SALA - u'\u0e29' # 0xc9 -> THAI CHARACTER SO RUSI - u'\u0e2a' # 0xca -> THAI CHARACTER SO SUA - u'\u0e2b' # 0xcb -> THAI CHARACTER HO HIP - u'\u0e2c' # 0xcc -> THAI CHARACTER LO CHULA - u'\u0e2d' # 0xcd -> THAI CHARACTER O ANG - u'\u0e2e' # 0xce -> THAI CHARACTER HO NOKHUK - u'\u0e2f' # 0xcf -> THAI CHARACTER PAIYANNOI - u'\u0e30' # 0xd0 -> THAI CHARACTER SARA A - u'\u0e31' # 0xd1 -> THAI CHARACTER MAI HAN-AKAT - u'\u0e32' # 0xd2 -> THAI CHARACTER SARA AA - u'\u0e33' # 0xd3 -> THAI CHARACTER SARA AM - u'\u0e34' # 0xd4 -> THAI CHARACTER SARA I - u'\u0e35' # 0xd5 -> THAI CHARACTER SARA II - u'\u0e36' # 0xd6 -> THAI CHARACTER SARA UE - u'\u0e37' # 0xd7 -> THAI CHARACTER SARA UEE - u'\u0e38' # 0xd8 -> THAI CHARACTER SARA U - u'\u0e39' # 0xd9 -> THAI CHARACTER SARA UU - u'\u0e3a' # 0xda -> THAI CHARACTER PHINTHU + u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI + u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI + u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT + u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI + u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON + u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG + u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU + u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN + u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING + u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG + u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO + u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE + u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING + u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA + u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK + u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN + u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO + u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO + u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN + u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK + u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO + u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG + u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN + u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG + u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU + u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI + u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA + u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG + u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA + u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN + u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN + u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO + u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA + u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK + u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA + u'\u0e24' # 0xC4 -> THAI CHARACTER RU + u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING + u'\u0e26' # 0xC6 -> THAI CHARACTER LU + u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN + u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA + u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI + u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA + u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP + u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA + u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG + u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK + u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI + u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A + u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT + u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA + u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM + u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I + u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II + u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE + u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE + u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U + u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU + u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' - u'\u0e3f' # 0xdf -> THAI CURRENCY SYMBOL BAHT - u'\u0e40' # 0xe0 -> THAI CHARACTER SARA E - u'\u0e41' # 0xe1 -> THAI CHARACTER SARA AE - u'\u0e42' # 0xe2 -> THAI CHARACTER SARA O - u'\u0e43' # 0xe3 -> THAI CHARACTER SARA AI MAIMUAN - u'\u0e44' # 0xe4 -> THAI CHARACTER SARA AI MAIMALAI - u'\u0e45' # 0xe5 -> THAI CHARACTER LAKKHANGYAO - u'\u0e46' # 0xe6 -> THAI CHARACTER MAIYAMOK - u'\u0e47' # 0xe7 -> THAI CHARACTER MAITAIKHU - u'\u0e48' # 0xe8 -> THAI CHARACTER MAI EK - u'\u0e49' # 0xe9 -> THAI CHARACTER MAI THO - u'\u0e4a' # 0xea -> THAI CHARACTER MAI TRI - u'\u0e4b' # 0xeb -> THAI CHARACTER MAI CHATTAWA - u'\u0e4c' # 0xec -> THAI CHARACTER THANTHAKHAT - u'\u0e4d' # 0xed -> THAI CHARACTER NIKHAHIT - u'\u0e4e' # 0xee -> THAI CHARACTER YAMAKKAN - u'\u0e4f' # 0xef -> THAI CHARACTER FONGMAN - u'\u0e50' # 0xf0 -> THAI DIGIT ZERO - u'\u0e51' # 0xf1 -> THAI DIGIT ONE - u'\u0e52' # 0xf2 -> THAI DIGIT TWO - u'\u0e53' # 0xf3 -> THAI DIGIT THREE - u'\u0e54' # 0xf4 -> THAI DIGIT FOUR - u'\u0e55' # 0xf5 -> THAI DIGIT FIVE - u'\u0e56' # 0xf6 -> THAI DIGIT SIX - u'\u0e57' # 0xf7 -> THAI DIGIT SEVEN - u'\u0e58' # 0xf8 -> THAI DIGIT EIGHT - u'\u0e59' # 0xf9 -> THAI DIGIT NINE - u'\u0e5a' # 0xfa -> THAI CHARACTER ANGKHANKHU - u'\u0e5b' # 0xfb -> THAI CHARACTER KHOMUT + u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT + u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E + u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE + u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O + u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN + u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI + u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO + u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK + u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU + u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK + u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO + u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI + u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA + u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT + u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT + u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN + u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN + u'\u0e50' # 0xF0 -> THAI DIGIT ZERO + u'\u0e51' # 0xF1 -> THAI DIGIT ONE + u'\u0e52' # 0xF2 -> THAI DIGIT TWO + u'\u0e53' # 0xF3 -> THAI DIGIT THREE + u'\u0e54' # 0xF4 -> THAI DIGIT FOUR + u'\u0e55' # 0xF5 -> THAI DIGIT FIVE + u'\u0e56' # 0xF6 -> THAI DIGIT SIX + u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN + u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT + u'\u0e59' # 0xF9 -> THAI DIGIT NINE + u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU + u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT u'\ufffe' u'\ufffe' u'\ufffe' @@ -303,12 +303,12 @@ 0x0007: 0x07, # BELL 0x0008: 0x08, # BACKSPACE 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000a: 0x0a, # LINE FEED - 0x000b: 0x0b, # VERTICAL TABULATION - 0x000c: 0x0c, # FORM FEED - 0x000d: 0x0d, # CARRIAGE RETURN - 0x000e: 0x0e, # SHIFT OUT - 0x000f: 0x0f, # SHIFT IN + 0x000A: 0x0A, # LINE FEED + 0x000B: 0x0B, # VERTICAL TABULATION + 0x000C: 0x0C, # FORM FEED + 0x000D: 0x0D, # CARRIAGE RETURN + 0x000E: 0x0E, # SHIFT OUT + 0x000F: 0x0F, # SHIFT IN 0x0010: 0x10, # DATA LINK ESCAPE 0x0011: 0x11, # DEVICE CONTROL ONE 0x0012: 0x12, # DEVICE CONTROL TWO @@ -319,12 +319,12 @@ 0x0017: 0x17, # END OF TRANSMISSION BLOCK 0x0018: 0x18, # CANCEL 0x0019: 0x19, # END OF MEDIUM - 0x001a: 0x1a, # SUBSTITUTE - 0x001b: 0x1b, # ESCAPE - 0x001c: 0x1c, # FILE SEPARATOR - 0x001d: 0x1d, # GROUP SEPARATOR - 0x001e: 0x1e, # RECORD SEPARATOR - 0x001f: 0x1f, # UNIT SEPARATOR + 0x001A: 0x1A, # SUBSTITUTE + 0x001B: 0x1B, # ESCAPE + 0x001C: 0x1C, # FILE SEPARATOR + 0x001D: 0x1D, # GROUP SEPARATOR + 0x001E: 0x1E, # RECORD SEPARATOR + 0x001F: 0x1F, # UNIT SEPARATOR 0x0020: 0x20, # SPACE 0x0021: 0x21, # EXCLAMATION MARK 0x0022: 0x22, # QUOTATION MARK @@ -335,12 +335,12 @@ 0x0027: 0x27, # APOSTROPHE 0x0028: 0x28, # LEFT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002a: 0x2a, # ASTERISK - 0x002b: 0x2b, # PLUS SIGN - 0x002c: 0x2c, # COMMA - 0x002d: 0x2d, # HYPHEN-MINUS - 0x002e: 0x2e, # FULL STOP - 0x002f: 0x2f, # SOLIDUS + 0x002A: 0x2A, # ASTERISK + 0x002B: 0x2B, # PLUS SIGN + 0x002C: 0x2C, # COMMA + 0x002D: 0x2D, # HYPHEN-MINUS + 0x002E: 0x2E, # FULL STOP + 0x002F: 0x2F, # SOLIDUS 0x0030: 0x30, # DIGIT ZERO 0x0031: 0x31, # DIGIT ONE 0x0032: 0x32, # DIGIT TWO @@ -351,12 +351,12 @@ 0x0037: 0x37, # DIGIT SEVEN 0x0038: 0x38, # DIGIT EIGHT 0x0039: 0x39, # DIGIT NINE - 0x003a: 0x3a, # COLON - 0x003b: 0x3b, # SEMICOLON - 0x003c: 0x3c, # LESS-THAN SIGN - 0x003d: 0x3d, # EQUALS SIGN - 0x003e: 0x3e, # GREATER-THAN SIGN - 0x003f: 0x3f, # QUESTION MARK + 0x003A: 0x3A, # COLON + 0x003B: 0x3B, # SEMICOLON + 0x003C: 0x3C, # LESS-THAN SIGN + 0x003D: 0x3D, # EQUALS SIGN + 0x003E: 0x3E, # GREATER-THAN SIGN + 0x003F: 0x3F, # QUESTION MARK 0x0040: 0x40, # COMMERCIAL AT 0x0041: 0x41, # LATIN CAPITAL LETTER A 0x0042: 0x42, # LATIN CAPITAL LETTER B @@ -367,12 +367,12 @@ 0x0047: 0x47, # LATIN CAPITAL LETTER G 0x0048: 0x48, # LATIN CAPITAL LETTER H 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004a: 0x4a, # LATIN CAPITAL LETTER J - 0x004b: 0x4b, # LATIN CAPITAL LETTER K - 0x004c: 0x4c, # LATIN CAPITAL LETTER L - 0x004d: 0x4d, # LATIN CAPITAL LETTER M - 0x004e: 0x4e, # LATIN CAPITAL LETTER N - 0x004f: 0x4f, # LATIN CAPITAL LETTER O + 0x004A: 0x4A, # LATIN CAPITAL LETTER J + 0x004B: 0x4B, # LATIN CAPITAL LETTER K + 0x004C: 0x4C, # LATIN CAPITAL LETTER L + 0x004D: 0x4D, # LATIN CAPITAL LETTER M + 0x004E: 0x4E, # LATIN CAPITAL LETTER N + 0x004F: 0x4F, # LATIN CAPITAL LETTER O 0x0050: 0x50, # LATIN CAPITAL LETTER P 0x0051: 0x51, # LATIN CAPITAL LETTER Q 0x0052: 0x52, # LATIN CAPITAL LETTER R @@ -383,12 +383,12 @@ 0x0057: 0x57, # LATIN CAPITAL LETTER W 0x0058: 0x58, # LATIN CAPITAL LETTER X 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005a: 0x5a, # LATIN CAPITAL LETTER Z - 0x005b: 0x5b, # LEFT SQUARE BRACKET - 0x005c: 0x5c, # REVERSE SOLIDUS - 0x005d: 0x5d, # RIGHT SQUARE BRACKET - 0x005e: 0x5e, # CIRCUMFLEX ACCENT - 0x005f: 0x5f, # LOW LINE + 0x005A: 0x5A, # LATIN CAPITAL LETTER Z + 0x005B: 0x5B, # LEFT SQUARE BRACKET + 0x005C: 0x5C, # REVERSE SOLIDUS + 0x005D: 0x5D, # RIGHT SQUARE BRACKET + 0x005E: 0x5E, # CIRCUMFLEX ACCENT + 0x005F: 0x5F, # LOW LINE 0x0060: 0x60, # GRAVE ACCENT 0x0061: 0x61, # LATIN SMALL LETTER A 0x0062: 0x62, # LATIN SMALL LETTER B @@ -399,12 +399,12 @@ 0x0067: 0x67, # LATIN SMALL LETTER G 0x0068: 0x68, # LATIN SMALL LETTER H 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006a: 0x6a, # LATIN SMALL LETTER J - 0x006b: 0x6b, # LATIN SMALL LETTER K - 0x006c: 0x6c, # LATIN SMALL LETTER L - 0x006d: 0x6d, # LATIN SMALL LETTER M - 0x006e: 0x6e, # LATIN SMALL LETTER N - 0x006f: 0x6f, # LATIN SMALL LETTER O + 0x006A: 0x6A, # LATIN SMALL LETTER J + 0x006B: 0x6B, # LATIN SMALL LETTER K + 0x006C: 0x6C, # LATIN SMALL LETTER L + 0x006D: 0x6D, # LATIN SMALL LETTER M + 0x006E: 0x6E, # LATIN SMALL LETTER N + 0x006F: 0x6F, # LATIN SMALL LETTER O 0x0070: 0x70, # LATIN SMALL LETTER P 0x0071: 0x71, # LATIN SMALL LETTER Q 0x0072: 0x72, # LATIN SMALL LETTER R @@ -415,12 +415,12 @@ 0x0077: 0x77, # LATIN SMALL LETTER W 0x0078: 0x78, # LATIN SMALL LETTER X 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007a: 0x7a, # LATIN SMALL LETTER Z - 0x007b: 0x7b, # LEFT CURLY BRACKET - 0x007c: 0x7c, # VERTICAL LINE - 0x007d: 0x7d, # RIGHT CURLY BRACKET - 0x007e: 0x7e, # TILDE - 0x007f: 0x7f, # DELETE + 0x007A: 0x7A, # LATIN SMALL LETTER Z + 0x007B: 0x7B, # LEFT CURLY BRACKET + 0x007C: 0x7C, # VERTICAL LINE + 0x007D: 0x7D, # RIGHT CURLY BRACKET + 0x007E: 0x7E, # TILDE + 0x007F: 0x7F, # DELETE 0x0080: 0x80, # 0x0081: 0x81, # 0x0082: 0x82, # @@ -431,12 +431,12 @@ 0x0087: 0x87, # 0x0088: 0x88, # 0x0089: 0x89, # - 0x008a: 0x8a, # - 0x008b: 0x8b, # - 0x008c: 0x8c, # - 0x008d: 0x8d, # - 0x008e: 0x8e, # - 0x008f: 0x8f, # + 0x008A: 0x8A, # + 0x008B: 0x8B, # + 0x008C: 0x8C, # + 0x008D: 0x8D, # + 0x008E: 0x8E, # + 0x008F: 0x8F, # 0x0090: 0x90, # 0x0091: 0x91, # 0x0092: 0x92, # @@ -447,97 +447,98 @@ 0x0097: 0x97, # 0x0098: 0x98, # 0x0099: 0x99, # - 0x009a: 0x9a, # - 0x009b: 0x9b, # - 0x009c: 0x9c, # - 0x009d: 0x9d, # - 0x009e: 0x9e, # - 0x009f: 0x9f, # - 0x0e01: 0xa1, # THAI CHARACTER KO KAI - 0x0e02: 0xa2, # THAI CHARACTER KHO KHAI - 0x0e03: 0xa3, # THAI CHARACTER KHO KHUAT - 0x0e04: 0xa4, # THAI CHARACTER KHO KHWAI - 0x0e05: 0xa5, # THAI CHARACTER KHO KHON - 0x0e06: 0xa6, # THAI CHARACTER KHO RAKHANG - 0x0e07: 0xa7, # THAI CHARACTER NGO NGU - 0x0e08: 0xa8, # THAI CHARACTER CHO CHAN - 0x0e09: 0xa9, # THAI CHARACTER CHO CHING - 0x0e0a: 0xaa, # THAI CHARACTER CHO CHANG - 0x0e0b: 0xab, # THAI CHARACTER SO SO - 0x0e0c: 0xac, # THAI CHARACTER CHO CHOE - 0x0e0d: 0xad, # THAI CHARACTER YO YING - 0x0e0e: 0xae, # THAI CHARACTER DO CHADA - 0x0e0f: 0xaf, # THAI CHARACTER TO PATAK - 0x0e10: 0xb0, # THAI CHARACTER THO THAN - 0x0e11: 0xb1, # THAI CHARACTER THO NANGMONTHO - 0x0e12: 0xb2, # THAI CHARACTER THO PHUTHAO - 0x0e13: 0xb3, # THAI CHARACTER NO NEN - 0x0e14: 0xb4, # THAI CHARACTER DO DEK - 0x0e15: 0xb5, # THAI CHARACTER TO TAO - 0x0e16: 0xb6, # THAI CHARACTER THO THUNG - 0x0e17: 0xb7, # THAI CHARACTER THO THAHAN - 0x0e18: 0xb8, # THAI CHARACTER THO THONG - 0x0e19: 0xb9, # THAI CHARACTER NO NU - 0x0e1a: 0xba, # THAI CHARACTER BO BAIMAI - 0x0e1b: 0xbb, # THAI CHARACTER PO PLA - 0x0e1c: 0xbc, # THAI CHARACTER PHO PHUNG - 0x0e1d: 0xbd, # THAI CHARACTER FO FA - 0x0e1e: 0xbe, # THAI CHARACTER PHO PHAN - 0x0e1f: 0xbf, # THAI CHARACTER FO FAN - 0x0e20: 0xc0, # THAI CHARACTER PHO SAMPHAO - 0x0e21: 0xc1, # THAI CHARACTER MO MA - 0x0e22: 0xc2, # THAI CHARACTER YO YAK - 0x0e23: 0xc3, # THAI CHARACTER RO RUA - 0x0e24: 0xc4, # THAI CHARACTER RU - 0x0e25: 0xc5, # THAI CHARACTER LO LING - 0x0e26: 0xc6, # THAI CHARACTER LU - 0x0e27: 0xc7, # THAI CHARACTER WO WAEN - 0x0e28: 0xc8, # THAI CHARACTER SO SALA - 0x0e29: 0xc9, # THAI CHARACTER SO RUSI - 0x0e2a: 0xca, # THAI CHARACTER SO SUA - 0x0e2b: 0xcb, # THAI CHARACTER HO HIP - 0x0e2c: 0xcc, # THAI CHARACTER LO CHULA - 0x0e2d: 0xcd, # THAI CHARACTER O ANG - 0x0e2e: 0xce, # THAI CHARACTER HO NOKHUK - 0x0e2f: 0xcf, # THAI CHARACTER PAIYANNOI - 0x0e30: 0xd0, # THAI CHARACTER SARA A - 0x0e31: 0xd1, # THAI CHARACTER MAI HAN-AKAT - 0x0e32: 0xd2, # THAI CHARACTER SARA AA - 0x0e33: 0xd3, # THAI CHARACTER SARA AM - 0x0e34: 0xd4, # THAI CHARACTER SARA I - 0x0e35: 0xd5, # THAI CHARACTER SARA II - 0x0e36: 0xd6, # THAI CHARACTER SARA UE - 0x0e37: 0xd7, # THAI CHARACTER SARA UEE - 0x0e38: 0xd8, # THAI CHARACTER SARA U - 0x0e39: 0xd9, # THAI CHARACTER SARA UU - 0x0e3a: 0xda, # THAI CHARACTER PHINTHU - 0x0e3f: 0xdf, # THAI CURRENCY SYMBOL BAHT - 0x0e40: 0xe0, # THAI CHARACTER SARA E - 0x0e41: 0xe1, # THAI CHARACTER SARA AE - 0x0e42: 0xe2, # THAI CHARACTER SARA O - 0x0e43: 0xe3, # THAI CHARACTER SARA AI MAIMUAN - 0x0e44: 0xe4, # THAI CHARACTER SARA AI MAIMALAI - 0x0e45: 0xe5, # THAI CHARACTER LAKKHANGYAO - 0x0e46: 0xe6, # THAI CHARACTER MAIYAMOK - 0x0e47: 0xe7, # THAI CHARACTER MAITAIKHU - 0x0e48: 0xe8, # THAI CHARACTER MAI EK - 0x0e49: 0xe9, # THAI CHARACTER MAI THO - 0x0e4a: 0xea, # THAI CHARACTER MAI TRI - 0x0e4b: 0xeb, # THAI CHARACTER MAI CHATTAWA - 0x0e4c: 0xec, # THAI CHARACTER THANTHAKHAT - 0x0e4d: 0xed, # THAI CHARACTER NIKHAHIT - 0x0e4e: 0xee, # THAI CHARACTER YAMAKKAN - 0x0e4f: 0xef, # THAI CHARACTER FONGMAN - 0x0e50: 0xf0, # THAI DIGIT ZERO - 0x0e51: 0xf1, # THAI DIGIT ONE - 0x0e52: 0xf2, # THAI DIGIT TWO - 0x0e53: 0xf3, # THAI DIGIT THREE - 0x0e54: 0xf4, # THAI DIGIT FOUR - 0x0e55: 0xf5, # THAI DIGIT FIVE - 0x0e56: 0xf6, # THAI DIGIT SIX - 0x0e57: 0xf7, # THAI DIGIT SEVEN - 0x0e58: 0xf8, # THAI DIGIT EIGHT - 0x0e59: 0xf9, # THAI DIGIT NINE - 0x0e5a: 0xfa, # THAI CHARACTER ANGKHANKHU - 0x0e5b: 0xfb, # THAI CHARACTER KHOMUT -} \ No newline at end of file + 0x009A: 0x9A, # + 0x009B: 0x9B, # + 0x009C: 0x9C, # + 0x009D: 0x9D, # + 0x009E: 0x9E, # + 0x009F: 0x9F, # + 0x0E01: 0xA1, # THAI CHARACTER KO KAI + 0x0E02: 0xA2, # THAI CHARACTER KHO KHAI + 0x0E03: 0xA3, # THAI CHARACTER KHO KHUAT + 0x0E04: 0xA4, # THAI CHARACTER KHO KHWAI + 0x0E05: 0xA5, # THAI CHARACTER KHO KHON + 0x0E06: 0xA6, # THAI CHARACTER KHO RAKHANG + 0x0E07: 0xA7, # THAI CHARACTER NGO NGU + 0x0E08: 0xA8, # THAI CHARACTER CHO CHAN + 0x0E09: 0xA9, # THAI CHARACTER CHO CHING + 0x0E0A: 0xAA, # THAI CHARACTER CHO CHANG + 0x0E0B: 0xAB, # THAI CHARACTER SO SO + 0x0E0C: 0xAC, # THAI CHARACTER CHO CHOE + 0x0E0D: 0xAD, # THAI CHARACTER YO YING + 0x0E0E: 0xAE, # THAI CHARACTER DO CHADA + 0x0E0F: 0xAF, # THAI CHARACTER TO PATAK + 0x0E10: 0xB0, # THAI CHARACTER THO THAN + 0x0E11: 0xB1, # THAI CHARACTER THO NANGMONTHO + 0x0E12: 0xB2, # THAI CHARACTER THO PHUTHAO + 0x0E13: 0xB3, # THAI CHARACTER NO NEN + 0x0E14: 0xB4, # THAI CHARACTER DO DEK + 0x0E15: 0xB5, # THAI CHARACTER TO TAO + 0x0E16: 0xB6, # THAI CHARACTER THO THUNG + 0x0E17: 0xB7, # THAI CHARACTER THO THAHAN + 0x0E18: 0xB8, # THAI CHARACTER THO THONG + 0x0E19: 0xB9, # THAI CHARACTER NO NU + 0x0E1A: 0xBA, # THAI CHARACTER BO BAIMAI + 0x0E1B: 0xBB, # THAI CHARACTER PO PLA + 0x0E1C: 0xBC, # THAI CHARACTER PHO PHUNG + 0x0E1D: 0xBD, # THAI CHARACTER FO FA + 0x0E1E: 0xBE, # THAI CHARACTER PHO PHAN + 0x0E1F: 0xBF, # THAI CHARACTER FO FAN + 0x0E20: 0xC0, # THAI CHARACTER PHO SAMPHAO + 0x0E21: 0xC1, # THAI CHARACTER MO MA + 0x0E22: 0xC2, # THAI CHARACTER YO YAK + 0x0E23: 0xC3, # THAI CHARACTER RO RUA + 0x0E24: 0xC4, # THAI CHARACTER RU + 0x0E25: 0xC5, # THAI CHARACTER LO LING + 0x0E26: 0xC6, # THAI CHARACTER LU + 0x0E27: 0xC7, # THAI CHARACTER WO WAEN + 0x0E28: 0xC8, # THAI CHARACTER SO SALA + 0x0E29: 0xC9, # THAI CHARACTER SO RUSI + 0x0E2A: 0xCA, # THAI CHARACTER SO SUA + 0x0E2B: 0xCB, # THAI CHARACTER HO HIP + 0x0E2C: 0xCC, # THAI CHARACTER LO CHULA + 0x0E2D: 0xCD, # THAI CHARACTER O ANG + 0x0E2E: 0xCE, # THAI CHARACTER HO NOKHUK + 0x0E2F: 0xCF, # THAI CHARACTER PAIYANNOI + 0x0E30: 0xD0, # THAI CHARACTER SARA A + 0x0E31: 0xD1, # THAI CHARACTER MAI HAN-AKAT + 0x0E32: 0xD2, # THAI CHARACTER SARA AA + 0x0E33: 0xD3, # THAI CHARACTER SARA AM + 0x0E34: 0xD4, # THAI CHARACTER SARA I + 0x0E35: 0xD5, # THAI CHARACTER SARA II + 0x0E36: 0xD6, # THAI CHARACTER SARA UE + 0x0E37: 0xD7, # THAI CHARACTER SARA UEE + 0x0E38: 0xD8, # THAI CHARACTER SARA U + 0x0E39: 0xD9, # THAI CHARACTER SARA UU + 0x0E3A: 0xDA, # THAI CHARACTER PHINTHU + 0x0E3F: 0xDF, # THAI CURRENCY SYMBOL BAHT + 0x0E40: 0xE0, # THAI CHARACTER SARA E + 0x0E41: 0xE1, # THAI CHARACTER SARA AE + 0x0E42: 0xE2, # THAI CHARACTER SARA O + 0x0E43: 0xE3, # THAI CHARACTER SARA AI MAIMUAN + 0x0E44: 0xE4, # THAI CHARACTER SARA AI MAIMALAI + 0x0E45: 0xE5, # THAI CHARACTER LAKKHANGYAO + 0x0E46: 0xE6, # THAI CHARACTER MAIYAMOK + 0x0E47: 0xE7, # THAI CHARACTER MAITAIKHU + 0x0E48: 0xE8, # THAI CHARACTER MAI EK + 0x0E49: 0xE9, # THAI CHARACTER MAI THO + 0x0E4A: 0xEA, # THAI CHARACTER MAI TRI + 0x0E4B: 0xEB, # THAI CHARACTER MAI CHATTAWA + 0x0E4C: 0xEC, # THAI CHARACTER THANTHAKHAT + 0x0E4D: 0xED, # THAI CHARACTER NIKHAHIT + 0x0E4E: 0xEE, # THAI CHARACTER YAMAKKAN + 0x0E4F: 0xEF, # THAI CHARACTER FONGMAN + 0x0E50: 0xF0, # THAI DIGIT ZERO + 0x0E51: 0xF1, # THAI DIGIT ONE + 0x0E52: 0xF2, # THAI DIGIT TWO + 0x0E53: 0xF3, # THAI DIGIT THREE + 0x0E54: 0xF4, # THAI DIGIT FOUR + 0x0E55: 0xF5, # THAI DIGIT FIVE + 0x0E56: 0xF6, # THAI DIGIT SIX + 0x0E57: 0xF7, # THAI DIGIT SEVEN + 0x0E58: 0xF8, # THAI DIGIT EIGHT + 0x0E59: 0xF9, # THAI DIGIT NINE + 0x0E5A: 0xFA, # THAI CHARACTER ANGKHANKHU + 0x0E5B: 0xFB, # THAI CHARACTER KHOMUT +} + From nascheme at users.sourceforge.net Tue Oct 25 08:30:19 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Tue, 25 Oct 2005 08:30:19 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python compile.c,2.362,2.363 Message-ID: <20051025063019.491261E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24488/Python Modified Files: compile.c Log Message: Ensure that compiler_exit_scope() is called as necessary to free memory allocated by compiler_enter_scope(). Change return type for compiler_exit_scope() to be void. Index: compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.362 retrieving revision 2.363 diff -u -d -r2.362 -r2.363 --- compile.c 23 Oct 2005 23:00:41 -0000 2.362 +++ compile.c 25 Oct 2005 06:30:14 -0000 2.363 @@ -1113,7 +1113,7 @@ return 1; } -static int +static void compiler_exit_scope(struct compiler *c) { int n; @@ -1126,14 +1126,14 @@ if (n >= 0) { wrapper = PyList_GET_ITEM(c->c_stack, n); c->u = (struct compiler_unit *)PyCObject_AsVoidPtr(wrapper); + /* we are deleting from a list so this really shouldn't fail */ if (PySequence_DelItem(c->c_stack, n) < 0) - return 0; + Py_FatalError("compiler_exit_scope()"); compiler_unit_check(c->u); } else c->u = NULL; - return 1; /* XXX void? */ } /* Allocate a new block and return a pointer to it. @@ -1701,8 +1701,10 @@ return NULL; switch (mod->kind) { case Module_kind: - if (!compiler_body(c, mod->v.Module.body)) + if (!compiler_body(c, mod->v.Module.body)) { + compiler_exit_scope(c); return 0; + } break; case Interactive_kind: c->c_interactive = 1; @@ -1872,8 +1874,10 @@ docstring = compiler_isdocstring(st); if (docstring) first_const = st->v.Expr.value->v.Str.s; - if (compiler_add_o(c, c->u->u_consts, first_const) < 0) + if (compiler_add_o(c, c->u->u_consts, first_const) < 0) { + compiler_exit_scope(c); return 0; + } /* unpack nested arguments */ compiler_arguments(c, args); @@ -1889,9 +1893,9 @@ VISIT(c, stmt, s2); } co = assemble(c, 1); + compiler_exit_scope(c); if (co == NULL) return 0; - compiler_exit_scope(c); compiler_make_closure(c, co, asdl_seq_LEN(args->defaults)); @@ -1923,6 +1927,7 @@ str = PyString_InternFromString("__name__"); if (!str || !compiler_nameop(c, str, Load)) { Py_XDECREF(str); + compiler_exit_scope(c); return 0; } @@ -1930,19 +1935,22 @@ str = PyString_InternFromString("__module__"); if (!str || !compiler_nameop(c, str, Store)) { Py_XDECREF(str); + compiler_exit_scope(c); return 0; } Py_DECREF(str); - if (!compiler_body(c, s->v.ClassDef.body)) + if (!compiler_body(c, s->v.ClassDef.body)) { + compiler_exit_scope(c); return 0; + } ADDOP(c, LOAD_LOCALS); ADDOP(c, RETURN_VALUE); co = assemble(c, 1); + compiler_exit_scope(c); if (co == NULL) return 0; - compiler_exit_scope(c); compiler_make_closure(c, co, 0); ADDOP_I(c, CALL_FUNCTION, 0); @@ -1976,9 +1984,9 @@ VISIT(c, expr, e->v.Lambda.body); ADDOP(c, RETURN_VALUE); co = assemble(c, 1); + compiler_exit_scope(c); if (co == NULL) return 0; - compiler_exit_scope(c); compiler_make_closure(c, co, asdl_seq_LEN(args->defaults)); Py_DECREF(name); @@ -3149,9 +3157,9 @@ compiler_genexp_generator(c, e->v.GeneratorExp.generators, 0, e->v.GeneratorExp.elt); co = assemble(c, 1); + compiler_exit_scope(c); if (co == NULL) return 0; - compiler_exit_scope(c); compiler_make_closure(c, co, 0); VISIT(c, expr, outermost_iter); From nascheme at users.sourceforge.net Tue Oct 25 09:54:58 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Tue, 25 Oct 2005 09:54:58 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python ast.c,2.4,2.5 Message-ID: <20051025075458.34C751E40E3@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv10957/Python Modified Files: ast.c Log Message: Write a separate ast_for_testlist_gexp() function instead of overloading ast_for_testlist(). Also, write a ast_for_class_bases() function and in the process fix a memory leak. Add some assertions. Index: ast.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/ast.c,v retrieving revision 2.4 retrieving revision 2.5 diff -u -d -r2.4 -r2.5 --- ast.c 23 Oct 2005 03:38:19 -0000 2.4 +++ ast.c 25 Oct 2005 07:54:54 -0000 2.5 @@ -35,7 +35,8 @@ static stmt_ty ast_for_stmt(struct compiling *, const node *); static asdl_seq *ast_for_suite(struct compiling *, const node *); static asdl_seq *ast_for_exprlist(struct compiling *, const node *, int); -static expr_ty ast_for_testlist(struct compiling *, const node *, int); +static expr_ty ast_for_testlist(struct compiling *, const node *); +static expr_ty ast_for_testlist_gexp(struct compiling *, const node *); /* Note different signature for ast_for_call */ static expr_ty ast_for_call(struct compiling *, const node *, expr_ty); @@ -251,7 +252,7 @@ expr_ty testlist_ast; /* XXX Why not gen_for here? */ - testlist_ast = ast_for_testlist(&c, CHILD(n, 0), 0); + testlist_ast = ast_for_testlist(&c, CHILD(n, 0)); if (!testlist_ast) goto error; return Expression(testlist_ast); @@ -980,7 +981,7 @@ free_expr(elt); return NULL; } - expression = ast_for_testlist(c, CHILD(ch, 3), 0); + expression = ast_for_testlist(c, CHILD(ch, 3)); if (!expression) { asdl_seq_free(t); asdl_seq_free(listcomps); @@ -1144,7 +1145,7 @@ free_expr(elt); return NULL; } - expression = ast_for_testlist(c, CHILD(ch, 3), 1); + expression = ast_for_expr(c, CHILD(ch, 3)); if (!expression) { asdl_seq_free(genexps); free_expr(elt); @@ -1184,11 +1185,17 @@ } for (j = 0; j < n_ifs; j++) { + expr_ty expression; REQ(ch, gen_iter); ch = CHILD(ch, 0); REQ(ch, gen_if); - asdl_seq_APPEND(ifs, ast_for_expr(c, CHILD(ch, 1))); + expression = ast_for_expr(c, CHILD(ch, 1)); + if (!expression) { + asdl_seq_free(genexps); + return NULL; + } + asdl_seq_APPEND(ifs, expression); if (NCH(ch) == 3) ch = CHILD(ch, 2); } @@ -1244,7 +1251,7 @@ if ((NCH(ch) > 1) && (TYPE(CHILD(ch, 1)) == gen_for)) return ast_for_genexp(c, ch); - return ast_for_testlist(c, ch, 1); + return ast_for_testlist_gexp(c, ch); case LSQB: /* list (or list comprehension) */ ch = CHILD(n, 1); @@ -1297,7 +1304,7 @@ return Dict(keys, values, LINENO(n)); } case BACKQUOTE: { /* repr */ - expr_ty expression = ast_for_testlist(c, CHILD(n, 1), 0); + expr_ty expression = ast_for_testlist(c, CHILD(n, 1)); if (!expression) return NULL; @@ -1552,7 +1559,7 @@ case yield_expr: { expr_ty exp = NULL; if (NCH(n) == 2) { - exp = ast_for_testlist(c, CHILD(n, 1), 0); + exp = ast_for_testlist(c, CHILD(n, 1)); if (!exp) return NULL; } @@ -1796,36 +1803,69 @@ return NULL; } -/* Unlike other ast_for_XXX() functions, this takes a flag that - indicates whether generator expressions are allowed. If gexp is - non-zero, check for testlist_gexp instead of plain testlist. -*/ - static expr_ty -ast_for_testlist(struct compiling *c, const node* n, int gexp) +ast_for_testlist(struct compiling *c, const node* n) { - /* testlist_gexp: test ( gen_for | (',' test)* [','] ) - testlist: test (',' test)* [','] - */ - + /* testlist_gexp: test (',' test)* [','] */ + /* testlist: test (',' test)* [','] */ + /* testlist_safe: test (',' test)+ [','] */ + /* testlist1: test (',' test)* */ assert(NCH(n) > 0); + if (TYPE(n) == testlist_gexp) { + if (NCH(n) > 1) + assert(TYPE(CHILD(n, 1)) != gen_for); + } + else { + assert(TYPE(n) == testlist || + TYPE(n) == testlist_safe || + TYPE(n) == testlist1); + } if (NCH(n) == 1) return ast_for_expr(c, CHILD(n, 0)); - if (TYPE(CHILD(n, 1)) == gen_for) { - if (!gexp) { - ast_error(n, "illegal generator expression"); - return NULL; - } - return ast_for_genexp(c, n); - } else { asdl_seq *tmp = seq_for_testlist(c, n); if (!tmp) return NULL; - return Tuple(tmp, Load, LINENO(n)); } - return NULL; /* unreachable */ +} + +static expr_ty +ast_for_testlist_gexp(struct compiling *c, const node* n) +{ + /* testlist_gexp: test ( gen_for | (',' test)* [','] ) */ + /* argument: test [ gen_for ] */ + assert(TYPE(n) == testlist_gexp || TYPE(n) == argument); + if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == gen_for) { + return ast_for_genexp(c, n); + } + else + return ast_for_testlist(c, n); +} + +/* like ast_for_testlist() but returns a sequence */ +static asdl_seq* +ast_for_class_bases(struct compiling *c, const node* n) +{ + /* testlist: test (',' test)* [','] */ + assert(NCH(n) > 0); + REQ(n, testlist); + if (NCH(n) == 1) { + expr_ty base; + asdl_seq *bases = asdl_seq_new(1); + if (!bases) + return NULL; + base = ast_for_expr(c, CHILD(n, 0)); + if (!base) { + asdl_seq_free(bases); + return NULL; + } + asdl_seq_SET(bases, 0, base); + return bases; + } + else { + return seq_for_testlist(c, n); + } } static stmt_ty @@ -1841,7 +1881,7 @@ */ if (NCH(n) == 1) { - expr_ty e = ast_for_testlist(c, CHILD(n, 0), 0); + expr_ty e = ast_for_testlist(c, CHILD(n, 0)); if (!e) return NULL; @@ -1853,7 +1893,7 @@ node *ch = CHILD(n, 0); if (TYPE(ch) == testlist) - expr1 = ast_for_testlist(c, ch, 0); + expr1 = ast_for_testlist(c, ch); else expr1 = Yield(ast_for_expr(c, CHILD(ch, 0)), LINENO(ch)); @@ -1874,7 +1914,7 @@ ch = CHILD(n, 2); if (TYPE(ch) == testlist) - expr2 = ast_for_testlist(c, ch, 0); + expr2 = ast_for_testlist(c, ch); else expr2 = Yield(ast_for_expr(c, ch), LINENO(ch)); if (!expr2) @@ -1904,7 +1944,7 @@ ast_error(ch, "assignment to yield expression not possible"); goto error; } - e = ast_for_testlist(c, ch, 0); + e = ast_for_testlist(c, ch); /* set context to assign */ if (!e) @@ -1919,7 +1959,7 @@ } value = CHILD(n, NCH(n) - 1); if (TYPE(value) == testlist) - expression = ast_for_testlist(c, value, 0); + expression = ast_for_testlist(c, value); else expression = ast_for_expr(c, value); if (!expression) @@ -2041,7 +2081,7 @@ if (NCH(ch) == 1) return Return(NULL, LINENO(n)); else { - expr_ty expression = ast_for_testlist(c, CHILD(ch, 1), 0); + expr_ty expression = ast_for_testlist(c, CHILD(ch, 1)); if (!expression) return NULL; return Return(expression, LINENO(n)); @@ -2599,7 +2639,7 @@ else target = Tuple(_target, Store, LINENO(n)); - expression = ast_for_testlist(c, CHILD(n, 3), 0); + expression = ast_for_testlist(c, CHILD(n, 3)); if (!expression) return NULL; suite_seq = ast_for_suite(c, CHILD(n, 5)); @@ -2725,7 +2765,6 @@ ast_for_classdef(struct compiling *c, const node *n) { /* classdef: 'class' NAME ['(' testlist ')'] ':' suite */ - expr_ty _bases; asdl_seq *bases, *s; REQ(n, classdef); @@ -2750,37 +2789,13 @@ } /* else handle the base class list */ - _bases = ast_for_testlist(c, CHILD(n, 3), 0); - if (!_bases) + bases = ast_for_class_bases(c, CHILD(n, 3)); + if (!bases) return NULL; - /* XXX: I don't think we can set to diff types here, how to free??? - - Here's the allocation chain: - Tuple (Python-ast.c:907) - ast_for_testlist (ast.c:1782) - ast_for_classdef (ast.c:2677) - */ - if (_bases->kind == Tuple_kind) - bases = _bases->v.Tuple.elts; - else { - bases = asdl_seq_new(1); - if (!bases) { - free_expr(_bases); - /* XXX: free _bases */ - return NULL; - } - asdl_seq_SET(bases, 0, _bases); - } s = ast_for_suite(c, CHILD(n, 6)); if (!s) { - /* XXX: I think this free is correct, but needs to change see above */ - if (_bases->kind == Tuple_kind) - free_expr(_bases); - else { - free_expr(_bases); - asdl_seq_free(bases); - } + asdl_seq_free(bases); return NULL; } return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), bases, s, LINENO(n)); From nascheme at users.sourceforge.net Tue Oct 25 11:16:09 2005 From: nascheme at users.sourceforge.net (nascheme@users.sourceforge.net) Date: Tue, 25 Oct 2005 11:16:09 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Python ast.c,2.5,2.6 Message-ID: <20051025091609.324B51E4006@bag.python.org> Update of /cvsroot/python/python/dist/src/Python In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv28113/Python Modified Files: ast.c Log Message: Refactor code for translating "power" nodes. Index: ast.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/ast.c,v retrieving revision 2.5 retrieving revision 2.6 diff -u -d -r2.5 -r2.6 --- ast.c 25 Oct 2005 07:54:54 -0000 2.5 +++ ast.c 25 Oct 2005 09:16:05 -0000 2.6 @@ -1439,6 +1439,99 @@ return result; } +static expr_ty +ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr) +{ + /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME */ + expr_ty e; + REQ(n, trailer); + if (TYPE(CHILD(n, 0)) == LPAR) { + if (NCH(n) == 2) + e = Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n)); + else + e = ast_for_call(c, CHILD(n, 1), left_expr); + } + else if (TYPE(CHILD(n, 0)) == LSQB) { + REQ(CHILD(n, 2), RSQB); + n = CHILD(n, 1); + if (NCH(n) <= 2) { + slice_ty slc = ast_for_slice(c, CHILD(n, 0)); + if (!slc) + return NULL; + e = Subscript(left_expr, slc, Load, LINENO(n)); + if (!e) { + free_slice(slc); + return NULL; + } + } + else { + int j; + slice_ty slc; + asdl_seq *slices = asdl_seq_new((NCH(n) + 1) / 2); + if (!slices) + return NULL; + for (j = 0; j < NCH(n); j += 2) { + slc = ast_for_slice(c, CHILD(n, j)); + if (!slc) { + asdl_seq_free(slices); + return NULL; + } + asdl_seq_SET(slices, j / 2, slc); + } + e = Subscript(left_expr, ExtSlice(slices), Load, LINENO(n)); + if (!e) { + asdl_seq_free(slices); + return NULL; + } + } + } + else { + assert(TYPE(CHILD(n, 0)) == DOT); + e = Attribute(left_expr, NEW_IDENTIFIER(CHILD(n, 1)), Load, LINENO(n)); + } + return e; +} + +static expr_ty +ast_for_power(struct compiling *c, const node *n) +{ + /* power: atom trailer* ('**' factor)* + */ + int i; + expr_ty e, tmp; + REQ(n, power); + e = ast_for_atom(c, CHILD(n, 0)); + if (!e) + return NULL; + if (NCH(n) == 1) + return e; + for (i = 1; i < NCH(n); i++) { + node *ch = CHILD(n, i); + if (TYPE(ch) != trailer) + break; + tmp = ast_for_trailer(c, ch, e); + if (!tmp) { + free_expr(e); + return NULL; + } + e = tmp; + } + if (TYPE(CHILD(n, NCH(n) - 1)) == factor) { + expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1)); + if (!f) { + free_expr(e); + return NULL; + } + tmp = BinOp(e, Pow, f, LINENO(n)); + if (!tmp) { + free_expr(e); + return NULL; + } + e = tmp; + } + return e; +} + /* Do not name a variable 'expr'! Will cause a compile error. */ @@ -1587,97 +1680,8 @@ } break; } - case power: { - expr_ty e = ast_for_atom(c, CHILD(n, 0)); - if (!e) - return NULL; - if (NCH(n) == 1) - return e; - /* power: atom trailer* ('**' factor)* - trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME - - XXX What about atom trailer trailer ** factor? - */ - for (i = 1; i < NCH(n); i++) { - expr_ty new = e; - node *ch = CHILD(n, i); - if (ch->n_str && strcmp(ch->n_str, "**") == 0) - break; - if (TYPE(CHILD(ch, 0)) == LPAR) { - if (NCH(ch) == 2) - new = Call(new, NULL, NULL, NULL, NULL, LINENO(ch)); - else - new = ast_for_call(c, CHILD(ch, 1), new); - - if (!new) { - free_expr(e); - return NULL; - } - } - else if (TYPE(CHILD(ch, 0)) == LSQB) { - REQ(CHILD(ch, 2), RSQB); - ch = CHILD(ch, 1); - if (NCH(ch) <= 2) { - slice_ty slc = ast_for_slice(c, CHILD(ch, 0)); - if (!slc) { - free_expr(e); - return NULL; - } - - new = Subscript(e, slc, Load, LINENO(ch)); - if (!new) { - free_expr(e); - free_slice(slc); - return NULL; - } - } - else { - int j; - slice_ty slc; - asdl_seq *slices = asdl_seq_new((NCH(ch) + 1) / 2); - if (!slices) { - free_expr(e); - return NULL; - } - - for (j = 0; j < NCH(ch); j += 2) { - slc = ast_for_slice(c, CHILD(ch, j)); - if (!slc) { - free_expr(e); - asdl_seq_free(slices); - return NULL; - } - asdl_seq_SET(slices, j / 2, slc); - } - new = Subscript(e, ExtSlice(slices), Load, LINENO(ch)); - if (!new) { - free_expr(e); - asdl_seq_free(slices); - return NULL; - } - } - } - else { - assert(TYPE(CHILD(ch, 0)) == DOT); - new = Attribute(e, NEW_IDENTIFIER(CHILD(ch, 1)), Load, - LINENO(ch)); - if (!new) { - free_expr(e); - return NULL; - } - } - e = new; - } - if (TYPE(CHILD(n, NCH(n) - 1)) == factor) { - expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1)); - if (!f) { - free_expr(e); - return NULL; - } - return BinOp(e, Pow, f, LINENO(n)); - } - return e; - } + case power: + return ast_for_power(c, n); default: abort(); PyErr_Format(PyExc_Exception, "unhandled expr: %d", TYPE(n)); From lemburg at users.sourceforge.net Tue Oct 25 13:52:55 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Tue, 25 Oct 2005 13:52:55 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Tools/unicode/python-mappings - New directory Message-ID: <20051025115255.166E71E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Tools/unicode/python-mappings In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27364/python-mappings Log Message: Directory /cvsroot/python/python/dist/src/Tools/unicode/python-mappings added to the repository From lemburg at users.sourceforge.net Tue Oct 25 13:53:37 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Tue, 25 Oct 2005 13:53:37 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Tools/unicode gencodec.py, 1.1, 1.2 Message-ID: <20051025115337.272731E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Tools/unicode In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27433 Modified Files: gencodec.py Log Message: Apply some cosmetic fixes to the output of the script. Only include the decoding map if no table can be generated. Index: gencodec.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Tools/unicode/gencodec.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- gencodec.py 21 Oct 2005 13:45:17 -0000 1.1 +++ gencodec.py 25 Oct 2005 11:53:33 -0000 1.2 @@ -15,12 +15,14 @@ The tool also writes marshalled versions of the mapping tables to the same location (with .mapping extension). -Written by Marc-Andre Lemburg (mal at lemburg.com). Modified to generate -Unicode table maps for decoding. +Written by Marc-Andre Lemburg (mal at lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright Guido van Rossum, 2000. + +Table generation: (c) Copyright Marc-Andre Lemburg, 2005. + Licensed to PSF under a Contributor Agreement. """#" @@ -117,21 +119,22 @@ return enc2uni -def hexrepr(t): +def hexrepr(t, precision=4): if t is None: return 'None' try: len(t) except: - return '0x%04x' % t + return '0x%0*X' % (precision, t) try: - return '(' + ', '.join(map(lambda t: '0x%04x' % t, t)) + ')' + return '(' + ', '.join(['0x%0*X' % (precision, item) + for item in t]) + ')' except TypeError, why: print '* failed to convert %r: %s' % (t, why) raise -def python_mapdef_code(varname, map, comments=1): +def python_mapdef_code(varname, map, comments=1, precisions=(2, 4)): l = [] append = l.append @@ -150,6 +153,7 @@ mappings = map.items() mappings.sort() i = 0 + key_precision, value_precision = precisions for mapkey, mapvalue in mappings: mapcomment = '' if isinstance(mapkey, tuple): @@ -164,8 +168,8 @@ # No need to include identity mappings, since these # are already set for the first 256 code points. continue - key = hexrepr(mapkey) - value = hexrepr(mapvalue) + key = hexrepr(mapkey, key_precision) + value = hexrepr(mapvalue, value_precision) if mapcomment and comments: append(' %s: %s,\t# %s' % (key, value, mapcomment)) else: @@ -188,7 +192,7 @@ return l -def python_tabledef_code(varname, map, comments=1): +def python_tabledef_code(varname, map, comments=1, key_precision=2): l = [] append = l.append @@ -236,7 +240,7 @@ mapchar = unichr(mapvalue) if mapcomment and comments: append(' %r\t# %s -> %s' % (mapchar, - hexrepr(key), + hexrepr(key, key_precision), mapcomment)) else: append(' %r' % mapchar) @@ -263,7 +267,8 @@ encoding_map_code = python_mapdef_code( 'encoding_map', codecs.make_encoding_map(map), - comments=comments) + comments=comments, + precisions=(4, 2)) l = [ '''\ @@ -303,22 +308,28 @@ def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) +''') + # Add decoding table or map (with preference to the table) + if not decoding_table_code: + l.append(''' ### Decoding Map ''') - l.extend(decoding_map_code) - - # Add optional decoding table - if decoding_table_code: + l.extend(decoding_map_code) + else: l.append(''' ### Decoding Table ''') l.extend(decoding_table_code) + # Add encoding map l.append(''' ### Encoding Map ''') l.extend(encoding_map_code) + + # Final new-line + l.append('\n') return '\n'.join(l) @@ -343,6 +354,8 @@ mapnames = os.listdir(dir) for mapname in mapnames: mappathname = os.path.join(dir, mapname) + if not os.path.isfile(mappathname): + continue name = os.path.split(mapname)[1] name = name.replace('-','_') name = name.split('.')[0] From lemburg at users.sourceforge.net Tue Oct 25 13:54:08 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Tue, 25 Oct 2005 13:54:08 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Tools/unicode/python-mappings CP1140.TXT, NONE, 1.1 KOI8-U.TXT, NONE, 1.1 TIS-620.TXT, NONE, 1.1 Message-ID: <20051025115408.F3DFC1E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Tools/unicode/python-mappings In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27571/python-mappings Added Files: CP1140.TXT KOI8-U.TXT TIS-620.TXT Log Message: Add custom mapping files used for generating some of the charmap codecs. --- NEW FILE: CP1140.TXT --- # # Name: CP1140 # Unicode version: 3.2 # Table version: 1.0 # Table format: Format A # Date: 2005-10-25 # Authors: Marc-Andre Lemburg # # This encoding is a modified CP037 encoding (with added Euro # currency sign). # # (c) Copyright Marc-Andre Lemburg, 2005. # Licensed to PSF under a Contributor Agreement. # # Based on the file # ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT # which is: # # Copyright (c) 2002 Unicode, Inc. All Rights reserved. # # This file is provided as-is by Unicode, Inc. (The Unicode Consortium). # No claims are made as to fitness for any particular purpose. No # warranties of any kind are expressed or implied. The recipient # agrees to determine applicability of information provided. If this # file has been provided on optical media by Unicode, Inc., the sole # remedy for any claim will be exchange of defective media within 90 # days of receipt. # # Unicode, Inc. hereby grants the right to freely use the information # supplied in this file in the creation of products supporting the # Unicode Standard, and to make copies of this file in any form for # internal or external distribution as long as this notice remains # attached. # 0x00 0x0000 #NULL 0x01 0x0001 #START OF HEADING 0x02 0x0002 #START OF TEXT 0x03 0x0003 #END OF TEXT 0x04 0x009C #CONTROL 0x05 0x0009 #HORIZONTAL TABULATION 0x06 0x0086 #CONTROL 0x07 0x007F #DELETE 0x08 0x0097 #CONTROL 0x09 0x008D #CONTROL 0x0A 0x008E #CONTROL 0x0B 0x000B #VERTICAL TABULATION 0x0C 0x000C #FORM FEED 0x0D 0x000D #CARRIAGE RETURN 0x0E 0x000E #SHIFT OUT 0x0F 0x000F #SHIFT IN 0x10 0x0010 #DATA LINK ESCAPE 0x11 0x0011 #DEVICE CONTROL ONE 0x12 0x0012 #DEVICE CONTROL TWO 0x13 0x0013 #DEVICE CONTROL THREE 0x14 0x009D #CONTROL 0x15 0x0085 #CONTROL 0x16 0x0008 #BACKSPACE 0x17 0x0087 #CONTROL 0x18 0x0018 #CANCEL 0x19 0x0019 #END OF MEDIUM 0x1A 0x0092 #CONTROL 0x1B 0x008F #CONTROL 0x1C 0x001C #FILE SEPARATOR 0x1D 0x001D #GROUP SEPARATOR 0x1E 0x001E #RECORD SEPARATOR 0x1F 0x001F #UNIT SEPARATOR 0x20 0x0080 #CONTROL 0x21 0x0081 #CONTROL 0x22 0x0082 #CONTROL 0x23 0x0083 #CONTROL 0x24 0x0084 #CONTROL 0x25 0x000A #LINE FEED 0x26 0x0017 #END OF TRANSMISSION BLOCK 0x27 0x001B #ESCAPE 0x28 0x0088 #CONTROL 0x29 0x0089 #CONTROL 0x2A 0x008A #CONTROL 0x2B 0x008B #CONTROL 0x2C 0x008C #CONTROL 0x2D 0x0005 #ENQUIRY 0x2E 0x0006 #ACKNOWLEDGE 0x2F 0x0007 #BELL 0x30 0x0090 #CONTROL 0x31 0x0091 #CONTROL 0x32 0x0016 #SYNCHRONOUS IDLE 0x33 0x0093 #CONTROL 0x34 0x0094 #CONTROL 0x35 0x0095 #CONTROL 0x36 0x0096 #CONTROL 0x37 0x0004 #END OF TRANSMISSION 0x38 0x0098 #CONTROL 0x39 0x0099 #CONTROL 0x3A 0x009A #CONTROL 0x3B 0x009B #CONTROL 0x3C 0x0014 #DEVICE CONTROL FOUR 0x3D 0x0015 #NEGATIVE ACKNOWLEDGE 0x3E 0x009E #CONTROL 0x3F 0x001A #SUBSTITUTE 0x40 0x0020 #SPACE 0x41 0x00A0 #NO-BREAK SPACE 0x42 0x00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX 0x43 0x00E4 #LATIN SMALL LETTER A WITH DIAERESIS 0x44 0x00E0 #LATIN SMALL LETTER A WITH GRAVE 0x45 0x00E1 #LATIN SMALL LETTER A WITH ACUTE 0x46 0x00E3 #LATIN SMALL LETTER A WITH TILDE 0x47 0x00E5 #LATIN SMALL LETTER A WITH RING ABOVE 0x48 0x00E7 #LATIN SMALL LETTER C WITH CEDILLA 0x49 0x00F1 #LATIN SMALL LETTER N WITH TILDE 0x4A 0x00A2 #CENT SIGN 0x4B 0x002E #FULL STOP 0x4C 0x003C #LESS-THAN SIGN 0x4D 0x0028 #LEFT PARENTHESIS 0x4E 0x002B #PLUS SIGN 0x4F 0x007C #VERTICAL LINE 0x50 0x0026 #AMPERSAND 0x51 0x00E9 #LATIN SMALL LETTER E WITH ACUTE 0x52 0x00EA #LATIN SMALL LETTER E WITH CIRCUMFLEX 0x53 0x00EB #LATIN SMALL LETTER E WITH DIAERESIS 0x54 0x00E8 #LATIN SMALL LETTER E WITH GRAVE 0x55 0x00ED #LATIN SMALL LETTER I WITH ACUTE 0x56 0x00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX 0x57 0x00EF #LATIN SMALL LETTER I WITH DIAERESIS 0x58 0x00EC #LATIN SMALL LETTER I WITH GRAVE 0x59 0x00DF #LATIN SMALL LETTER SHARP S (GERMAN) 0x5A 0x0021 #EXCLAMATION MARK 0x5B 0x0024 #DOLLAR SIGN 0x5C 0x002A #ASTERISK 0x5D 0x0029 #RIGHT PARENTHESIS 0x5E 0x003B #SEMICOLON 0x5F 0x00AC #NOT SIGN 0x60 0x002D #HYPHEN-MINUS 0x61 0x002F #SOLIDUS 0x62 0x00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x63 0x00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS 0x64 0x00C0 #LATIN CAPITAL LETTER A WITH GRAVE 0x65 0x00C1 #LATIN CAPITAL LETTER A WITH ACUTE 0x66 0x00C3 #LATIN CAPITAL LETTER A WITH TILDE 0x67 0x00C5 #LATIN CAPITAL LETTER A WITH RING ABOVE 0x68 0x00C7 #LATIN CAPITAL LETTER C WITH CEDILLA 0x69 0x00D1 #LATIN CAPITAL LETTER N WITH TILDE 0x6A 0x00A6 #BROKEN BAR 0x6B 0x002C #COMMA 0x6C 0x0025 #PERCENT SIGN 0x6D 0x005F #LOW LINE 0x6E 0x003E #GREATER-THAN SIGN 0x6F 0x003F #QUESTION MARK 0x70 0x00F8 #LATIN SMALL LETTER O WITH STROKE 0x71 0x00C9 #LATIN CAPITAL LETTER E WITH ACUTE 0x72 0x00CA #LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x73 0x00CB #LATIN CAPITAL LETTER E WITH DIAERESIS 0x74 0x00C8 #LATIN CAPITAL LETTER E WITH GRAVE 0x75 0x00CD #LATIN CAPITAL LETTER I WITH ACUTE 0x76 0x00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX 0x77 0x00CF #LATIN CAPITAL LETTER I WITH DIAERESIS 0x78 0x00CC #LATIN CAPITAL LETTER I WITH GRAVE 0x79 0x0060 #GRAVE ACCENT 0x7A 0x003A #COLON 0x7B 0x0023 #NUMBER SIGN 0x7C 0x0040 #COMMERCIAL AT 0x7D 0x0027 #APOSTROPHE 0x7E 0x003D #EQUALS SIGN 0x7F 0x0022 #QUOTATION MARK 0x80 0x00D8 #LATIN CAPITAL LETTER O WITH STROKE 0x81 0x0061 #LATIN SMALL LETTER A 0x82 0x0062 #LATIN SMALL LETTER B 0x83 0x0063 #LATIN SMALL LETTER C 0x84 0x0064 #LATIN SMALL LETTER D 0x85 0x0065 #LATIN SMALL LETTER E 0x86 0x0066 #LATIN SMALL LETTER F 0x87 0x0067 #LATIN SMALL LETTER G 0x88 0x0068 #LATIN SMALL LETTER H 0x89 0x0069 #LATIN SMALL LETTER I 0x8A 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x8B 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x8C 0x00F0 #LATIN SMALL LETTER ETH (ICELANDIC) 0x8D 0x00FD #LATIN SMALL LETTER Y WITH ACUTE 0x8E 0x00FE #LATIN SMALL LETTER THORN (ICELANDIC) 0x8F 0x00B1 #PLUS-MINUS SIGN 0x90 0x00B0 #DEGREE SIGN 0x91 0x006A #LATIN SMALL LETTER J 0x92 0x006B #LATIN SMALL LETTER K 0x93 0x006C #LATIN SMALL LETTER L 0x94 0x006D #LATIN SMALL LETTER M 0x95 0x006E #LATIN SMALL LETTER N 0x96 0x006F #LATIN SMALL LETTER O 0x97 0x0070 #LATIN SMALL LETTER P 0x98 0x0071 #LATIN SMALL LETTER Q 0x99 0x0072 #LATIN SMALL LETTER R 0x9A 0x00AA #FEMININE ORDINAL INDICATOR 0x9B 0x00BA #MASCULINE ORDINAL INDICATOR 0x9C 0x00E6 #LATIN SMALL LIGATURE AE 0x9D 0x00B8 #CEDILLA 0x9E 0x00C6 #LATIN CAPITAL LIGATURE AE #0x9F 0x00A4 #CURRENCY SIGN 0x9F 0x20AC # EURO SIGN 0xA0 0x00B5 #MICRO SIGN 0xA1 0x007E #TILDE 0xA2 0x0073 #LATIN SMALL LETTER S 0xA3 0x0074 #LATIN SMALL LETTER T 0xA4 0x0075 #LATIN SMALL LETTER U 0xA5 0x0076 #LATIN SMALL LETTER V 0xA6 0x0077 #LATIN SMALL LETTER W 0xA7 0x0078 #LATIN SMALL LETTER X 0xA8 0x0079 #LATIN SMALL LETTER Y 0xA9 0x007A #LATIN SMALL LETTER Z 0xAA 0x00A1 #INVERTED EXCLAMATION MARK 0xAB 0x00BF #INVERTED QUESTION MARK 0xAC 0x00D0 #LATIN CAPITAL LETTER ETH (ICELANDIC) 0xAD 0x00DD #LATIN CAPITAL LETTER Y WITH ACUTE 0xAE 0x00DE #LATIN CAPITAL LETTER THORN (ICELANDIC) 0xAF 0x00AE #REGISTERED SIGN 0xB0 0x005E #CIRCUMFLEX ACCENT 0xB1 0x00A3 #POUND SIGN 0xB2 0x00A5 #YEN SIGN 0xB3 0x00B7 #MIDDLE DOT 0xB4 0x00A9 #COPYRIGHT SIGN 0xB5 0x00A7 #SECTION SIGN 0xB6 0x00B6 #PILCROW SIGN 0xB7 0x00BC #VULGAR FRACTION ONE QUARTER 0xB8 0x00BD #VULGAR FRACTION ONE HALF 0xB9 0x00BE #VULGAR FRACTION THREE QUARTERS 0xBA 0x005B #LEFT SQUARE BRACKET 0xBB 0x005D #RIGHT SQUARE BRACKET 0xBC 0x00AF #MACRON 0xBD 0x00A8 #DIAERESIS 0xBE 0x00B4 #ACUTE ACCENT 0xBF 0x00D7 #MULTIPLICATION SIGN 0xC0 0x007B #LEFT CURLY BRACKET 0xC1 0x0041 #LATIN CAPITAL LETTER A 0xC2 0x0042 #LATIN CAPITAL LETTER B 0xC3 0x0043 #LATIN CAPITAL LETTER C 0xC4 0x0044 #LATIN CAPITAL LETTER D 0xC5 0x0045 #LATIN CAPITAL LETTER E 0xC6 0x0046 #LATIN CAPITAL LETTER F 0xC7 0x0047 #LATIN CAPITAL LETTER G 0xC8 0x0048 #LATIN CAPITAL LETTER H 0xC9 0x0049 #LATIN CAPITAL LETTER I 0xCA 0x00AD #SOFT HYPHEN 0xCB 0x00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX 0xCC 0x00F6 #LATIN SMALL LETTER O WITH DIAERESIS 0xCD 0x00F2 #LATIN SMALL LETTER O WITH GRAVE 0xCE 0x00F3 #LATIN SMALL LETTER O WITH ACUTE 0xCF 0x00F5 #LATIN SMALL LETTER O WITH TILDE 0xD0 0x007D #RIGHT CURLY BRACKET 0xD1 0x004A #LATIN CAPITAL LETTER J 0xD2 0x004B #LATIN CAPITAL LETTER K 0xD3 0x004C #LATIN CAPITAL LETTER L 0xD4 0x004D #LATIN CAPITAL LETTER M 0xD5 0x004E #LATIN CAPITAL LETTER N 0xD6 0x004F #LATIN CAPITAL LETTER O 0xD7 0x0050 #LATIN CAPITAL LETTER P 0xD8 0x0051 #LATIN CAPITAL LETTER Q 0xD9 0x0052 #LATIN CAPITAL LETTER R 0xDA 0x00B9 #SUPERSCRIPT ONE 0xDB 0x00FB #LATIN SMALL LETTER U WITH CIRCUMFLEX 0xDC 0x00FC #LATIN SMALL LETTER U WITH DIAERESIS 0xDD 0x00F9 #LATIN SMALL LETTER U WITH GRAVE 0xDE 0x00FA #LATIN SMALL LETTER U WITH ACUTE 0xDF 0x00FF #LATIN SMALL LETTER Y WITH DIAERESIS 0xE0 0x005C #REVERSE SOLIDUS 0xE1 0x00F7 #DIVISION SIGN 0xE2 0x0053 #LATIN CAPITAL LETTER S 0xE3 0x0054 #LATIN CAPITAL LETTER T 0xE4 0x0055 #LATIN CAPITAL LETTER U 0xE5 0x0056 #LATIN CAPITAL LETTER V 0xE6 0x0057 #LATIN CAPITAL LETTER W 0xE7 0x0058 #LATIN CAPITAL LETTER X 0xE8 0x0059 #LATIN CAPITAL LETTER Y 0xE9 0x005A #LATIN CAPITAL LETTER Z 0xEA 0x00B2 #SUPERSCRIPT TWO 0xEB 0x00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0xEC 0x00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS 0xED 0x00D2 #LATIN CAPITAL LETTER O WITH GRAVE 0xEE 0x00D3 #LATIN CAPITAL LETTER O WITH ACUTE 0xEF 0x00D5 #LATIN CAPITAL LETTER O WITH TILDE 0xF0 0x0030 #DIGIT ZERO 0xF1 0x0031 #DIGIT ONE 0xF2 0x0032 #DIGIT TWO 0xF3 0x0033 #DIGIT THREE 0xF4 0x0034 #DIGIT FOUR 0xF5 0x0035 #DIGIT FIVE 0xF6 0x0036 #DIGIT SIX 0xF7 0x0037 #DIGIT SEVEN 0xF8 0x0038 #DIGIT EIGHT 0xF9 0x0039 #DIGIT NINE 0xFA 0x00B3 #SUPERSCRIPT THREE 0xFB 0x00DB #LATIN CAPITAL LETTER U WITH CIRCUMFLEX 0xFC 0x00DC #LATIN CAPITAL LETTER U WITH DIAERESIS 0xFD 0x00D9 #LATIN CAPITAL LETTER U WITH GRAVE 0xFE 0x00DA #LATIN CAPITAL LETTER U WITH ACUTE 0xFF 0x009F #CONTROL --- NEW FILE: KOI8-U.TXT --- # # Name: KOI8-U (RFC2319) to Unicode # Unicode version: 3.2 # Table version: 1.0 # Table format: Format A # Date: 2005-10-25 # Authors: Marc-Andre Lemburg # # See RFC2319 for details. This encoding is a modified KOI8-R # encoding. # # (c) Copyright Marc-Andre Lemburg, 2005. # Licensed to PSF under a Contributor Agreement. # # Based on the file # ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT # which is: # # Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. # # This file is provided as-is by Unicode, Inc. (The Unicode Consortium). # No claims are made as to fitness for any particular purpose. No # warranties of any kind are expressed or implied. The recipient # agrees to determine applicability of information provided. If this # file has been provided on optical media by Unicode, Inc., the sole # remedy for any claim will be exchange of defective media within 90 # days of receipt. # # Unicode, Inc. hereby grants the right to freely use the information # supplied in this file in the creation of products supporting the # Unicode Standard, and to make copies of this file in any form for # internal or external distribution as long as this notice remains # attached. # 0x00 0x0000 # NULL 0x01 0x0001 # START OF HEADING 0x02 0x0002 # START OF TEXT 0x03 0x0003 # END OF TEXT 0x04 0x0004 # END OF TRANSMISSION 0x05 0x0005 # ENQUIRY 0x06 0x0006 # ACKNOWLEDGE 0x07 0x0007 # BELL 0x08 0x0008 # BACKSPACE 0x09 0x0009 # HORIZONTAL TABULATION 0x0A 0x000A # LINE FEED 0x0B 0x000B # VERTICAL TABULATION 0x0C 0x000C # FORM FEED 0x0D 0x000D # CARRIAGE RETURN 0x0E 0x000E # SHIFT OUT 0x0F 0x000F # SHIFT IN 0x10 0x0010 # DATA LINK ESCAPE 0x11 0x0011 # DEVICE CONTROL ONE 0x12 0x0012 # DEVICE CONTROL TWO 0x13 0x0013 # DEVICE CONTROL THREE 0x14 0x0014 # DEVICE CONTROL FOUR 0x15 0x0015 # NEGATIVE ACKNOWLEDGE 0x16 0x0016 # SYNCHRONOUS IDLE 0x17 0x0017 # END OF TRANSMISSION BLOCK 0x18 0x0018 # CANCEL 0x19 0x0019 # END OF MEDIUM 0x1A 0x001A # SUBSTITUTE 0x1B 0x001B # ESCAPE 0x1C 0x001C # FILE SEPARATOR 0x1D 0x001D # GROUP SEPARATOR 0x1E 0x001E # RECORD SEPARATOR 0x1F 0x001F # UNIT SEPARATOR 0x20 0x0020 # SPACE 0x21 0x0021 # EXCLAMATION MARK 0x22 0x0022 # QUOTATION MARK 0x23 0x0023 # NUMBER SIGN 0x24 0x0024 # DOLLAR SIGN 0x25 0x0025 # PERCENT SIGN 0x26 0x0026 # AMPERSAND 0x27 0x0027 # APOSTROPHE 0x28 0x0028 # LEFT PARENTHESIS 0x29 0x0029 # RIGHT PARENTHESIS 0x2A 0x002A # ASTERISK 0x2B 0x002B # PLUS SIGN 0x2C 0x002C # COMMA 0x2D 0x002D # HYPHEN-MINUS 0x2E 0x002E # FULL STOP 0x2F 0x002F # SOLIDUS 0x30 0x0030 # DIGIT ZERO 0x31 0x0031 # DIGIT ONE 0x32 0x0032 # DIGIT TWO 0x33 0x0033 # DIGIT THREE 0x34 0x0034 # DIGIT FOUR 0x35 0x0035 # DIGIT FIVE 0x36 0x0036 # DIGIT SIX 0x37 0x0037 # DIGIT SEVEN 0x38 0x0038 # DIGIT EIGHT 0x39 0x0039 # DIGIT NINE 0x3A 0x003A # COLON 0x3B 0x003B # SEMICOLON 0x3C 0x003C # LESS-THAN SIGN 0x3D 0x003D # EQUALS SIGN 0x3E 0x003E # GREATER-THAN SIGN 0x3F 0x003F # QUESTION MARK 0x40 0x0040 # COMMERCIAL AT 0x41 0x0041 # LATIN CAPITAL LETTER A 0x42 0x0042 # LATIN CAPITAL LETTER B 0x43 0x0043 # LATIN CAPITAL LETTER C 0x44 0x0044 # LATIN CAPITAL LETTER D 0x45 0x0045 # LATIN CAPITAL LETTER E 0x46 0x0046 # LATIN CAPITAL LETTER F 0x47 0x0047 # LATIN CAPITAL LETTER G 0x48 0x0048 # LATIN CAPITAL LETTER H 0x49 0x0049 # LATIN CAPITAL LETTER I 0x4A 0x004A # LATIN CAPITAL LETTER J 0x4B 0x004B # LATIN CAPITAL LETTER K 0x4C 0x004C # LATIN CAPITAL LETTER L 0x4D 0x004D # LATIN CAPITAL LETTER M 0x4E 0x004E # LATIN CAPITAL LETTER N 0x4F 0x004F # LATIN CAPITAL LETTER O 0x50 0x0050 # LATIN CAPITAL LETTER P 0x51 0x0051 # LATIN CAPITAL LETTER Q 0x52 0x0052 # LATIN CAPITAL LETTER R 0x53 0x0053 # LATIN CAPITAL LETTER S 0x54 0x0054 # LATIN CAPITAL LETTER T 0x55 0x0055 # LATIN CAPITAL LETTER U 0x56 0x0056 # LATIN CAPITAL LETTER V 0x57 0x0057 # LATIN CAPITAL LETTER W 0x58 0x0058 # LATIN CAPITAL LETTER X 0x59 0x0059 # LATIN CAPITAL LETTER Y 0x5A 0x005A # LATIN CAPITAL LETTER Z 0x5B 0x005B # LEFT SQUARE BRACKET 0x5C 0x005C # REVERSE SOLIDUS 0x5D 0x005D # RIGHT SQUARE BRACKET 0x5E 0x005E # CIRCUMFLEX ACCENT 0x5F 0x005F # LOW LINE 0x60 0x0060 # GRAVE ACCENT 0x61 0x0061 # LATIN SMALL LETTER A 0x62 0x0062 # LATIN SMALL LETTER B 0x63 0x0063 # LATIN SMALL LETTER C 0x64 0x0064 # LATIN SMALL LETTER D 0x65 0x0065 # LATIN SMALL LETTER E 0x66 0x0066 # LATIN SMALL LETTER F 0x67 0x0067 # LATIN SMALL LETTER G 0x68 0x0068 # LATIN SMALL LETTER H 0x69 0x0069 # LATIN SMALL LETTER I 0x6A 0x006A # LATIN SMALL LETTER J 0x6B 0x006B # LATIN SMALL LETTER K 0x6C 0x006C # LATIN SMALL LETTER L 0x6D 0x006D # LATIN SMALL LETTER M 0x6E 0x006E # LATIN SMALL LETTER N 0x6F 0x006F # LATIN SMALL LETTER O 0x70 0x0070 # LATIN SMALL LETTER P 0x71 0x0071 # LATIN SMALL LETTER Q 0x72 0x0072 # LATIN SMALL LETTER R 0x73 0x0073 # LATIN SMALL LETTER S 0x74 0x0074 # LATIN SMALL LETTER T 0x75 0x0075 # LATIN SMALL LETTER U 0x76 0x0076 # LATIN SMALL LETTER V 0x77 0x0077 # LATIN SMALL LETTER W 0x78 0x0078 # LATIN SMALL LETTER X 0x79 0x0079 # LATIN SMALL LETTER Y 0x7A 0x007A # LATIN SMALL LETTER Z 0x7B 0x007B # LEFT CURLY BRACKET 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE 0x7F 0x007F # DELETE 0x80 0x2500 # BOX DRAWINGS LIGHT HORIZONTAL 0x81 0x2502 # BOX DRAWINGS LIGHT VERTICAL 0x82 0x250C # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x83 0x2510 # BOX DRAWINGS LIGHT DOWN AND LEFT 0x84 0x2514 # BOX DRAWINGS LIGHT UP AND RIGHT 0x85 0x2518 # BOX DRAWINGS LIGHT UP AND LEFT 0x86 0x251C # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x87 0x2524 # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x88 0x252C # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x89 0x2534 # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x8A 0x253C # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x8B 0x2580 # UPPER HALF BLOCK 0x8C 0x2584 # LOWER HALF BLOCK 0x8D 0x2588 # FULL BLOCK 0x8E 0x258C # LEFT HALF BLOCK 0x8F 0x2590 # RIGHT HALF BLOCK 0x90 0x2591 # LIGHT SHADE 0x91 0x2592 # MEDIUM SHADE 0x92 0x2593 # DARK SHADE 0x93 0x2320 # TOP HALF INTEGRAL 0x94 0x25A0 # BLACK SQUARE 0x95 0x2219 # BULLET OPERATOR 0x96 0x221A # SQUARE ROOT 0x97 0x2248 # ALMOST EQUAL TO 0x98 0x2264 # LESS-THAN OR EQUAL TO 0x99 0x2265 # GREATER-THAN OR EQUAL TO 0x9A 0x00A0 # NO-BREAK SPACE 0x9B 0x2321 # BOTTOM HALF INTEGRAL 0x9C 0x00B0 # DEGREE SIGN 0x9D 0x00B2 # SUPERSCRIPT TWO 0x9E 0x00B7 # MIDDLE DOT 0x9F 0x00F7 # DIVISION SIGN 0xA0 0x2550 # BOX DRAWINGS DOUBLE HORIZONTAL 0xA1 0x2551 # BOX DRAWINGS DOUBLE VERTICAL 0xA2 0x2552 # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0xA3 0x0451 # CYRILLIC SMALL LETTER IO #0xA4 0x2553 # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0xA4 0x0454 # CYRILLIC SMALL LETTER UKRAINIAN IE 0xA5 0x2554 # BOX DRAWINGS DOUBLE DOWN AND RIGHT #0xA6 0x2555 # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0xA6 0x0456 # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I #0xA7 0x2556 # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0xA7 0x0457 # CYRILLIC SMALL LETTER YI (UKRAINIAN) 0xA8 0x2557 # BOX DRAWINGS DOUBLE DOWN AND LEFT 0xA9 0x2558 # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0xAA 0x2559 # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0xAB 0x255A # BOX DRAWINGS DOUBLE UP AND RIGHT 0xAC 0x255B # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE #0xAD 0x255C # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0xAD 0x0491 # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN 0xAE 0x255D # BOX DRAWINGS DOUBLE UP AND LEFT 0xAF 0x255E # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0xB0 0x255F # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0xB1 0x2560 # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0xB2 0x2561 # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0xB3 0x0401 # CYRILLIC CAPITAL LETTER IO #0xB4 0x2562 # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0xB4 0x0404 # CYRILLIC CAPITAL LETTER UKRAINIAN IE 0xB5 0x2563 # BOX DRAWINGS DOUBLE VERTICAL AND LEFT #0xB6 0x2564 # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0xB6 0x0406 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I #0xB7 0x2565 # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0xB7 0x0407 # CYRILLIC CAPITAL LETTER YI (UKRAINIAN) 0xB8 0x2566 # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0xB9 0x2567 # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0xBA 0x2568 # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0xBB 0x2569 # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0xBC 0x256A # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE #0xBD 0x256B # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0xBD 0x0490 # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN 0xBE 0x256C # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0xBF 0x00A9 # COPYRIGHT SIGN 0xC0 0x044E # CYRILLIC SMALL LETTER YU 0xC1 0x0430 # CYRILLIC SMALL LETTER A 0xC2 0x0431 # CYRILLIC SMALL LETTER BE 0xC3 0x0446 # CYRILLIC SMALL LETTER TSE 0xC4 0x0434 # CYRILLIC SMALL LETTER DE 0xC5 0x0435 # CYRILLIC SMALL LETTER IE 0xC6 0x0444 # CYRILLIC SMALL LETTER EF 0xC7 0x0433 # CYRILLIC SMALL LETTER GHE 0xC8 0x0445 # CYRILLIC SMALL LETTER HA 0xC9 0x0438 # CYRILLIC SMALL LETTER I 0xCA 0x0439 # CYRILLIC SMALL LETTER SHORT I 0xCB 0x043A # CYRILLIC SMALL LETTER KA 0xCC 0x043B # CYRILLIC SMALL LETTER EL 0xCD 0x043C # CYRILLIC SMALL LETTER EM 0xCE 0x043D # CYRILLIC SMALL LETTER EN 0xCF 0x043E # CYRILLIC SMALL LETTER O 0xD0 0x043F # CYRILLIC SMALL LETTER PE 0xD1 0x044F # CYRILLIC SMALL LETTER YA 0xD2 0x0440 # CYRILLIC SMALL LETTER ER 0xD3 0x0441 # CYRILLIC SMALL LETTER ES 0xD4 0x0442 # CYRILLIC SMALL LETTER TE 0xD5 0x0443 # CYRILLIC SMALL LETTER U 0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE 0xD7 0x0432 # CYRILLIC SMALL LETTER VE 0xD8 0x044C # CYRILLIC SMALL LETTER SOFT SIGN 0xD9 0x044B # CYRILLIC SMALL LETTER YERU 0xDA 0x0437 # CYRILLIC SMALL LETTER ZE 0xDB 0x0448 # CYRILLIC SMALL LETTER SHA 0xDC 0x044D # CYRILLIC SMALL LETTER E 0xDD 0x0449 # CYRILLIC SMALL LETTER SHCHA 0xDE 0x0447 # CYRILLIC SMALL LETTER CHE 0xDF 0x044A # CYRILLIC SMALL LETTER HARD SIGN 0xE0 0x042E # CYRILLIC CAPITAL LETTER YU 0xE1 0x0410 # CYRILLIC CAPITAL LETTER A 0xE2 0x0411 # CYRILLIC CAPITAL LETTER BE 0xE3 0x0426 # CYRILLIC CAPITAL LETTER TSE 0xE4 0x0414 # CYRILLIC CAPITAL LETTER DE 0xE5 0x0415 # CYRILLIC CAPITAL LETTER IE 0xE6 0x0424 # CYRILLIC CAPITAL LETTER EF 0xE7 0x0413 # CYRILLIC CAPITAL LETTER GHE 0xE8 0x0425 # CYRILLIC CAPITAL LETTER HA 0xE9 0x0418 # CYRILLIC CAPITAL LETTER I 0xEA 0x0419 # CYRILLIC CAPITAL LETTER SHORT I 0xEB 0x041A # CYRILLIC CAPITAL LETTER KA 0xEC 0x041B # CYRILLIC CAPITAL LETTER EL 0xED 0x041C # CYRILLIC CAPITAL LETTER EM 0xEE 0x041D # CYRILLIC CAPITAL LETTER EN 0xEF 0x041E # CYRILLIC CAPITAL LETTER O 0xF0 0x041F # CYRILLIC CAPITAL LETTER PE 0xF1 0x042F # CYRILLIC CAPITAL LETTER YA 0xF2 0x0420 # CYRILLIC CAPITAL LETTER ER 0xF3 0x0421 # CYRILLIC CAPITAL LETTER ES 0xF4 0x0422 # CYRILLIC CAPITAL LETTER TE 0xF5 0x0423 # CYRILLIC CAPITAL LETTER U 0xF6 0x0416 # CYRILLIC CAPITAL LETTER ZHE 0xF7 0x0412 # CYRILLIC CAPITAL LETTER VE 0xF8 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN 0xF9 0x042B # CYRILLIC CAPITAL LETTER YERU 0xFA 0x0417 # CYRILLIC CAPITAL LETTER ZE 0xFB 0x0428 # CYRILLIC CAPITAL LETTER SHA 0xFC 0x042D # CYRILLIC CAPITAL LETTER E 0xFD 0x0429 # CYRILLIC CAPITAL LETTER SHCHA 0xFE 0x0427 # CYRILLIC CAPITAL LETTER CHE 0xFF 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN --- NEW FILE: TIS-620.TXT --- # # Name: TIS-620 # Unicode version: 3.2 # Table version: 1.0 # Table format: Format A # Date: 2005-10-25 # Authors: Marc-Andre Lemburg # # According to # ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT the # TIS-620 is the identical to ISO_8859-11 with the 0xA0 # (no-break space) mapping removed. # # (c) Copyright Marc-Andre Lemburg, 2005. # Licensed to PSF under a Contributor Agreement. # # Based on the file # ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT # which is: # # Copyright (c) 2002 Unicode, Inc. All Rights reserved. # # This file is provided as-is by Unicode, Inc. (The Unicode Consortium). # No claims are made as to fitness for any particular purpose. No # warranties of any kind are expressed or implied. The recipient # agrees to determine applicability of information provided. If this # file has been provided on optical media by Unicode, Inc., the sole # remedy for any claim will be exchange of defective media within 90 # days of receipt. # # Unicode, Inc. hereby grants the right to freely use the information # supplied in this file in the creation of products supporting the # Unicode Standard, and to make copies of this file in any form for # internal or external distribution as long as this notice remains # attached. # 0x00 0x0000 # NULL 0x01 0x0001 # START OF HEADING 0x02 0x0002 # START OF TEXT 0x03 0x0003 # END OF TEXT 0x04 0x0004 # END OF TRANSMISSION 0x05 0x0005 # ENQUIRY 0x06 0x0006 # ACKNOWLEDGE 0x07 0x0007 # BELL 0x08 0x0008 # BACKSPACE 0x09 0x0009 # HORIZONTAL TABULATION 0x0A 0x000A # LINE FEED 0x0B 0x000B # VERTICAL TABULATION 0x0C 0x000C # FORM FEED 0x0D 0x000D # CARRIAGE RETURN 0x0E 0x000E # SHIFT OUT 0x0F 0x000F # SHIFT IN 0x10 0x0010 # DATA LINK ESCAPE 0x11 0x0011 # DEVICE CONTROL ONE 0x12 0x0012 # DEVICE CONTROL TWO 0x13 0x0013 # DEVICE CONTROL THREE 0x14 0x0014 # DEVICE CONTROL FOUR 0x15 0x0015 # NEGATIVE ACKNOWLEDGE 0x16 0x0016 # SYNCHRONOUS IDLE 0x17 0x0017 # END OF TRANSMISSION BLOCK 0x18 0x0018 # CANCEL 0x19 0x0019 # END OF MEDIUM 0x1A 0x001A # SUBSTITUTE 0x1B 0x001B # ESCAPE 0x1C 0x001C # FILE SEPARATOR 0x1D 0x001D # GROUP SEPARATOR 0x1E 0x001E # RECORD SEPARATOR 0x1F 0x001F # UNIT SEPARATOR 0x20 0x0020 # SPACE 0x21 0x0021 # EXCLAMATION MARK 0x22 0x0022 # QUOTATION MARK 0x23 0x0023 # NUMBER SIGN 0x24 0x0024 # DOLLAR SIGN 0x25 0x0025 # PERCENT SIGN 0x26 0x0026 # AMPERSAND 0x27 0x0027 # APOSTROPHE 0x28 0x0028 # LEFT PARENTHESIS 0x29 0x0029 # RIGHT PARENTHESIS 0x2A 0x002A # ASTERISK 0x2B 0x002B # PLUS SIGN 0x2C 0x002C # COMMA 0x2D 0x002D # HYPHEN-MINUS 0x2E 0x002E # FULL STOP 0x2F 0x002F # SOLIDUS 0x30 0x0030 # DIGIT ZERO 0x31 0x0031 # DIGIT ONE 0x32 0x0032 # DIGIT TWO 0x33 0x0033 # DIGIT THREE 0x34 0x0034 # DIGIT FOUR 0x35 0x0035 # DIGIT FIVE 0x36 0x0036 # DIGIT SIX 0x37 0x0037 # DIGIT SEVEN 0x38 0x0038 # DIGIT EIGHT 0x39 0x0039 # DIGIT NINE 0x3A 0x003A # COLON 0x3B 0x003B # SEMICOLON 0x3C 0x003C # LESS-THAN SIGN 0x3D 0x003D # EQUALS SIGN 0x3E 0x003E # GREATER-THAN SIGN 0x3F 0x003F # QUESTION MARK 0x40 0x0040 # COMMERCIAL AT 0x41 0x0041 # LATIN CAPITAL LETTER A 0x42 0x0042 # LATIN CAPITAL LETTER B 0x43 0x0043 # LATIN CAPITAL LETTER C 0x44 0x0044 # LATIN CAPITAL LETTER D 0x45 0x0045 # LATIN CAPITAL LETTER E 0x46 0x0046 # LATIN CAPITAL LETTER F 0x47 0x0047 # LATIN CAPITAL LETTER G 0x48 0x0048 # LATIN CAPITAL LETTER H 0x49 0x0049 # LATIN CAPITAL LETTER I 0x4A 0x004A # LATIN CAPITAL LETTER J 0x4B 0x004B # LATIN CAPITAL LETTER K 0x4C 0x004C # LATIN CAPITAL LETTER L 0x4D 0x004D # LATIN CAPITAL LETTER M 0x4E 0x004E # LATIN CAPITAL LETTER N 0x4F 0x004F # LATIN CAPITAL LETTER O 0x50 0x0050 # LATIN CAPITAL LETTER P 0x51 0x0051 # LATIN CAPITAL LETTER Q 0x52 0x0052 # LATIN CAPITAL LETTER R 0x53 0x0053 # LATIN CAPITAL LETTER S 0x54 0x0054 # LATIN CAPITAL LETTER T 0x55 0x0055 # LATIN CAPITAL LETTER U 0x56 0x0056 # LATIN CAPITAL LETTER V 0x57 0x0057 # LATIN CAPITAL LETTER W 0x58 0x0058 # LATIN CAPITAL LETTER X 0x59 0x0059 # LATIN CAPITAL LETTER Y 0x5A 0x005A # LATIN CAPITAL LETTER Z 0x5B 0x005B # LEFT SQUARE BRACKET 0x5C 0x005C # REVERSE SOLIDUS 0x5D 0x005D # RIGHT SQUARE BRACKET 0x5E 0x005E # CIRCUMFLEX ACCENT 0x5F 0x005F # LOW LINE 0x60 0x0060 # GRAVE ACCENT 0x61 0x0061 # LATIN SMALL LETTER A 0x62 0x0062 # LATIN SMALL LETTER B 0x63 0x0063 # LATIN SMALL LETTER C 0x64 0x0064 # LATIN SMALL LETTER D 0x65 0x0065 # LATIN SMALL LETTER E 0x66 0x0066 # LATIN SMALL LETTER F 0x67 0x0067 # LATIN SMALL LETTER G 0x68 0x0068 # LATIN SMALL LETTER H 0x69 0x0069 # LATIN SMALL LETTER I 0x6A 0x006A # LATIN SMALL LETTER J 0x6B 0x006B # LATIN SMALL LETTER K 0x6C 0x006C # LATIN SMALL LETTER L 0x6D 0x006D # LATIN SMALL LETTER M 0x6E 0x006E # LATIN SMALL LETTER N 0x6F 0x006F # LATIN SMALL LETTER O 0x70 0x0070 # LATIN SMALL LETTER P 0x71 0x0071 # LATIN SMALL LETTER Q 0x72 0x0072 # LATIN SMALL LETTER R 0x73 0x0073 # LATIN SMALL LETTER S 0x74 0x0074 # LATIN SMALL LETTER T 0x75 0x0075 # LATIN SMALL LETTER U 0x76 0x0076 # LATIN SMALL LETTER V 0x77 0x0077 # LATIN SMALL LETTER W 0x78 0x0078 # LATIN SMALL LETTER X 0x79 0x0079 # LATIN SMALL LETTER Y 0x7A 0x007A # LATIN SMALL LETTER Z 0x7B 0x007B # LEFT CURLY BRACKET 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE 0x7F 0x007F # DELETE 0x80 0x0080 # 0x81 0x0081 # 0x82 0x0082 # 0x83 0x0083 # 0x84 0x0084 # 0x85 0x0085 # 0x86 0x0086 # 0x87 0x0087 # 0x88 0x0088 # 0x89 0x0089 # 0x8A 0x008A # 0x8B 0x008B # 0x8C 0x008C # 0x8D 0x008D # 0x8E 0x008E # 0x8F 0x008F # 0x90 0x0090 # 0x91 0x0091 # 0x92 0x0092 # 0x93 0x0093 # 0x94 0x0094 # 0x95 0x0095 # 0x96 0x0096 # 0x97 0x0097 # 0x98 0x0098 # 0x99 0x0099 # 0x9A 0x009A # 0x9B 0x009B # 0x9C 0x009C # 0x9D 0x009D # 0x9E 0x009E # 0x9F 0x009F # #0xA0 0x00A0 # NO-BREAK SPACE 0xA1 0x0E01 # THAI CHARACTER KO KAI 0xA2 0x0E02 # THAI CHARACTER KHO KHAI 0xA3 0x0E03 # THAI CHARACTER KHO KHUAT 0xA4 0x0E04 # THAI CHARACTER KHO KHWAI 0xA5 0x0E05 # THAI CHARACTER KHO KHON 0xA6 0x0E06 # THAI CHARACTER KHO RAKHANG 0xA7 0x0E07 # THAI CHARACTER NGO NGU 0xA8 0x0E08 # THAI CHARACTER CHO CHAN 0xA9 0x0E09 # THAI CHARACTER CHO CHING 0xAA 0x0E0A # THAI CHARACTER CHO CHANG 0xAB 0x0E0B # THAI CHARACTER SO SO 0xAC 0x0E0C # THAI CHARACTER CHO CHOE 0xAD 0x0E0D # THAI CHARACTER YO YING 0xAE 0x0E0E # THAI CHARACTER DO CHADA 0xAF 0x0E0F # THAI CHARACTER TO PATAK 0xB0 0x0E10 # THAI CHARACTER THO THAN 0xB1 0x0E11 # THAI CHARACTER THO NANGMONTHO 0xB2 0x0E12 # THAI CHARACTER THO PHUTHAO 0xB3 0x0E13 # THAI CHARACTER NO NEN 0xB4 0x0E14 # THAI CHARACTER DO DEK 0xB5 0x0E15 # THAI CHARACTER TO TAO 0xB6 0x0E16 # THAI CHARACTER THO THUNG 0xB7 0x0E17 # THAI CHARACTER THO THAHAN 0xB8 0x0E18 # THAI CHARACTER THO THONG 0xB9 0x0E19 # THAI CHARACTER NO NU 0xBA 0x0E1A # THAI CHARACTER BO BAIMAI 0xBB 0x0E1B # THAI CHARACTER PO PLA 0xBC 0x0E1C # THAI CHARACTER PHO PHUNG 0xBD 0x0E1D # THAI CHARACTER FO FA 0xBE 0x0E1E # THAI CHARACTER PHO PHAN 0xBF 0x0E1F # THAI CHARACTER FO FAN 0xC0 0x0E20 # THAI CHARACTER PHO SAMPHAO 0xC1 0x0E21 # THAI CHARACTER MO MA 0xC2 0x0E22 # THAI CHARACTER YO YAK 0xC3 0x0E23 # THAI CHARACTER RO RUA 0xC4 0x0E24 # THAI CHARACTER RU 0xC5 0x0E25 # THAI CHARACTER LO LING 0xC6 0x0E26 # THAI CHARACTER LU 0xC7 0x0E27 # THAI CHARACTER WO WAEN 0xC8 0x0E28 # THAI CHARACTER SO SALA 0xC9 0x0E29 # THAI CHARACTER SO RUSI 0xCA 0x0E2A # THAI CHARACTER SO SUA 0xCB 0x0E2B # THAI CHARACTER HO HIP 0xCC 0x0E2C # THAI CHARACTER LO CHULA 0xCD 0x0E2D # THAI CHARACTER O ANG 0xCE 0x0E2E # THAI CHARACTER HO NOKHUK 0xCF 0x0E2F # THAI CHARACTER PAIYANNOI 0xD0 0x0E30 # THAI CHARACTER SARA A 0xD1 0x0E31 # THAI CHARACTER MAI HAN-AKAT 0xD2 0x0E32 # THAI CHARACTER SARA AA 0xD3 0x0E33 # THAI CHARACTER SARA AM 0xD4 0x0E34 # THAI CHARACTER SARA I 0xD5 0x0E35 # THAI CHARACTER SARA II 0xD6 0x0E36 # THAI CHARACTER SARA UE 0xD7 0x0E37 # THAI CHARACTER SARA UEE 0xD8 0x0E38 # THAI CHARACTER SARA U 0xD9 0x0E39 # THAI CHARACTER SARA UU 0xDA 0x0E3A # THAI CHARACTER PHINTHU 0xDF 0x0E3F # THAI CURRENCY SYMBOL BAHT 0xE0 0x0E40 # THAI CHARACTER SARA E 0xE1 0x0E41 # THAI CHARACTER SARA AE 0xE2 0x0E42 # THAI CHARACTER SARA O 0xE3 0x0E43 # THAI CHARACTER SARA AI MAIMUAN 0xE4 0x0E44 # THAI CHARACTER SARA AI MAIMALAI 0xE5 0x0E45 # THAI CHARACTER LAKKHANGYAO 0xE6 0x0E46 # THAI CHARACTER MAIYAMOK 0xE7 0x0E47 # THAI CHARACTER MAITAIKHU 0xE8 0x0E48 # THAI CHARACTER MAI EK 0xE9 0x0E49 # THAI CHARACTER MAI THO 0xEA 0x0E4A # THAI CHARACTER MAI TRI 0xEB 0x0E4B # THAI CHARACTER MAI CHATTAWA 0xEC 0x0E4C # THAI CHARACTER THANTHAKHAT 0xED 0x0E4D # THAI CHARACTER NIKHAHIT 0xEE 0x0E4E # THAI CHARACTER YAMAKKAN 0xEF 0x0E4F # THAI CHARACTER FONGMAN 0xF0 0x0E50 # THAI DIGIT ZERO 0xF1 0x0E51 # THAI DIGIT ONE 0xF2 0x0E52 # THAI DIGIT TWO 0xF3 0x0E53 # THAI DIGIT THREE 0xF4 0x0E54 # THAI DIGIT FOUR 0xF5 0x0E55 # THAI DIGIT FIVE 0xF6 0x0E56 # THAI DIGIT SIX 0xF7 0x0E57 # THAI DIGIT SEVEN 0xF8 0x0E58 # THAI DIGIT EIGHT 0xF9 0x0E59 # THAI DIGIT NINE 0xFA 0x0E5A # THAI CHARACTER ANGKHANKHU 0xFB 0x0E5B # THAI CHARACTER KHOMUT From lemburg at users.sourceforge.net Tue Oct 25 13:55:04 2005 From: lemburg at users.sourceforge.net (lemburg@users.sourceforge.net) Date: Tue, 25 Oct 2005 13:55:04 +0200 (CEST) Subject: [Python-checkins] python/dist/src/Tools/unicode Makefile,NONE,1.1 Message-ID: <20051025115504.50D1A1E4002@bag.python.org> Update of /cvsroot/python/python/dist/src/Tools/unicode In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27707 Added Files: Makefile Log Message: Add Makefile which allows easily rebuilding the charmap codecs. --- NEW FILE: Makefile --- # # Recreate the Python charmap codecs from the Unicode mapping # files available at ftp://ftp.unicode.org/ # #(c) Copyright Marc-Andre Lemburg, 2005. # Licensed to PSF under a Contributor Agreement. # Python binary to use PYTHON = python # Remove tool to use RM = /bin/rm ### Generic targets all: distclean mappings codecs codecs: misc windows iso apple ebcdic custom-mappings ### Mappings mappings: ncftpget -R ftp.unicode.org . Public/MAPPINGS ### Codecs build/: mkdir build misc: build/ $(PYTHON) gencodec.py MAPPINGS/VENDORS/MISC/ build/ $(RM) build/atarist.* $(RM) build/us_ascii_quotes.* $(RM) build/ibmgraph.* $(RM) build/sgml.* $(RM) -f build/readme.* custom-mappings: build/ $(PYTHON) gencodec.py python-mappings/ build/ windows: build/ $(PYTHON) gencodec.py MAPPINGS/VENDORS/MICSFT/WINDOWS/ build/ $(RM) build/cp9* $(RM) -f build/readme.* iso: build/ $(PYTHON) gencodec.py MAPPINGS/ISO8859/ build/iso $(RM) -f build/isoreadme.* apple: build/ $(PYTHON) gencodec.py MAPPINGS/VENDORS/APPLE/ build/mac_ $(RM) build/mac_dingbats.* $(RM) build/mac_japanese.* $(RM) build/mac_chin* $(RM) build/mac_korean.* $(RM) build/mac_symbol.* $(RM) build/mac_corpchar.* $(RM) build/mac_devanaga.* $(RM) build/mac_gaelic.* $(RM) build/mac_gurmukhi.* $(RM) build/mac_hebrew.* $(RM) build/mac_inuit.* $(RM) build/mac_thai.* $(RM) build/mac_ukraine.* $(RM) build/mac_arabic.py $(RM) build/mac_celtic.* $(RM) build/mac_gujarati.* $(RM) build/mac_keyboard.* $(RM) -f build/mac_readme.* ebcdic: build/ $(PYTHON) gencodec.py MAPPINGS/VENDORS/MICSFT/EBCDIC/ build/ $(RM) -f build/readme.* ### Cleanup clean: $(RM) build/* distclean: clean $(RM) -rf MAPPINGS/ From martin.v.loewis at python.org Thu Oct 27 12:21:29 2005 From: martin.v.loewis at python.org (martin.v.loewis@python.org) Date: Thu, 27 Oct 2005 12:21:29 +0200 (CEST) Subject: [Python-checkins] commit of r41337 - peps/trunk Message-ID: <20051027102129.9D23F1E4002@bag.python.org> Author: martin.v.loewis Date: Thu Oct 27 12:21:29 2005 New Revision: 41337 Modified: peps/trunk/pep-0347.txt Log: Correct svn+ssh URL. Modified: peps/trunk/pep-0347.txt ============================================================================== --- peps/trunk/pep-0347.txt (original) +++ peps/trunk/pep-0347.txt Thu Oct 27 12:21:29 2005 @@ -213,7 +213,7 @@ The repository should be published at http://svn.python.org/projects. Read-write access should be granted to all current SF committers -through svn+ssh://pythondev at svn.python.org/projects; +through svn+ssh://pythondev at svn.python.org/; read-only anonymous access through WebDAV should also be granted. From martin.v.loewis at python.org Thu Oct 27 20:56:22 2005 From: martin.v.loewis at python.org (martin.v.loewis@python.org) Date: Thu, 27 Oct 2005 20:56:22 +0200 (CEST) Subject: [Python-checkins] commit of r41338 - python/trunk/Doc/inst Message-ID: <20051027185622.C25721E4002@bag.python.org> Author: martin.v.loewis Date: Thu Oct 27 20:56:22 2005 New Revision: 41338 Modified: python/trunk/Doc/inst/inst.tex Log: Update instructions according to Dave Abrahams instructions. Modified: python/trunk/Doc/inst/inst.tex ============================================================================== --- python/trunk/Doc/inst/inst.tex (original) +++ python/trunk/Doc/inst/inst.tex Thu Oct 27 20:56:22 2005 @@ -1030,6 +1030,9 @@ \subsubsection{GNU C / Cygwin / MinGW} +These instructions only apply if you're using a version of Python prior +to 2.4.1 with a MinGW prior to 3.0.0 (with binutils-2.13.90-20030111-1). + This section describes the necessary steps to use Distutils with the GNU C/\Cpp{} compilers in their Cygwin and MinGW distributions.\footnote{Check From brett.cannon at python.org Fri Oct 28 04:53:50 2005 From: brett.cannon at python.org (brett.cannon@python.org) Date: Fri, 28 Oct 2005 04:53:50 +0200 (CEST) Subject: [Python-checkins] commit of r41339 - peps/trunk Message-ID: <20051028025350.2D0681E40B4@bag.python.org> Author: brett.cannon Date: Fri Oct 28 04:53:49 2005 New Revision: 41339 Added: peps/trunk/pep-0352.txt Modified: peps/trunk/pep-0000.txt Log: Check in initial version of PEP 352: Required Superclass for Exceptions. Modified: peps/trunk/pep-0000.txt ============================================================================== --- peps/trunk/pep-0000.txt (original) +++ peps/trunk/pep-0000.txt Fri Oct 28 04:53:49 2005 @@ -107,9 +107,10 @@ S 349 Allow str() to return unicode strings Schemenauer I 350 Codetags Elliott S 351 The freeze protocol Warsaw + S 352 Required Superclass for Exceptions GvR, Cannon S 754 IEEE 754 Floating Point Special Values Warnes - Finished PEPs (done, implemented in CVS) + Finished PEPs (done, implemented in Subversion) SF 100 Python Unicode Integration Lemburg IF 160 Python 1.6 Release Schedule Drake @@ -398,6 +399,7 @@ S 349 Allow str() to return unicode strings Schemenauer I 350 Codetags Elliott S 351 The freeze protocol Warsaw + S 352 Required Superclass for Exceptions GvR, Cannon SR 666 Reject Foolish Indentation Creighton S 754 IEEE 754 Floating Point Special Values Warnes I 3000 Python 3.0 Plans Kuchling, Cannon Added: peps/trunk/pep-0352.txt ============================================================================== --- (empty file) +++ peps/trunk/pep-0352.txt Fri Oct 28 04:53:49 2005 @@ -0,0 +1,233 @@ +PEP: 352 +Title: Required Superclass for Exceptions +Version: $Revision: 1.5 $ +Last-Modified: $Date: 2005/06/07 13:17:37 $ +Author: Brett Cannon , Guido van Rossum +Status: Draft +Type: Standards Track +Content-Type: text/x-rst +Created: 27-Oct-2005 +Post-History: + + +Abstract +======== + +In Python 2.4 and before, any (classic) class can be raised as an +exception. The plan is to allow new-style classes starting in Python +2.5, but this makes the problem worse -- it would mean *any* class (or +instance) can be raised! This is a problem since it prevents any +guarantees to be made about the interface of exceptions. This PEP +proposes introducing a new superclass that all raised objects must +inherit from. Imposing the restriction will allow a standard +interface for exceptions to exist that can be relied upon. + +On might counter that requiring a specific base class for a particular +interface is unPythonic. However, in the specific case of exceptions +there's a good reason (which has generally been agreed to on +python-dev): requiring hierarchy helps code that wants to *catch* +exceptions by making it possible to catch *all* exceptions explicitly +by writing ``except BaseException:`` instead of +``except *:``. [#hierarchy-good]_ + +Introducing a new superclass for exceptions also gives us the chance +to rearrange the exception hierarchy slightly for the better. As it +currently stands, all exceptions in the built-in namespace inherit +from Exception. This is a problem since this includes two exceptions +(KeyboardInterrupt and SystemExit) that are usually meant to signal +that the interpreter should be shut down. Changing it so that these +two exceptions inherit from the common superclass instead of Exception +will make it easy for people to write ``except`` clauses that are not +overreaching and not catch exceptions that should propagate up and +terminate the interpreter. + +This PEP is based on previous work done for PEP 348 [#pep348]_. + + +Requiring a Common Superclass +============================= + +This PEP proposes introducing a new exception named BaseException that +is a new-style class and has a single attribute, ``message``:: + + class BaseException(object): + + """Superclass representing the base of the exception hierarchy. + + Provides a 'message' attribute that contains any argument + passed in during instantiation. + + The 'args' attribute and __getitem__ method are provided for + backwards-compatibility and will be deprecated at some point. + + """ + + def __init__(self, message='', *args): + """Set 'message' and 'args' attribute""" + self.message = message + self.args = (message,) + if args: + self.args += args + + def __str__(self): + """Return the str of 'message'""" + if len(self.args) > 1: + return str(self.args) + else: + return str(self.message) + + def __unicode__(self): + """Return the unicode of 'message'""" + if len(self.args) > 1: + return unicode(self.args) + else: + return unicode(self.message) + + def __repr__(self): + if len(self.args) > 1: + args_repr = "*%s" % self.args + else: + args_repr = repr(self.message) + return "%s(%s)" % (self.__class__.__name__, args_repr) + + def __getitem__(self, index): + """Index into arguments passed in during instantiation. + + Provided for backwards-compatibility and will be + deprecated. + + """ + if index == 0: + return self.message + else: + return self.args[index-1] + + +The ``message`` attribute will contain either the argument passed in +at instantiation of the object or the empty string. The attribute is +meant to act as a common location to store any extra information that +is to be passed along with the exception that goes beyond the location +of the exception within the exception hierarchy and the exception's +type. + +No restriction is placed upon what may be passed in for ``messsage``. +This provides backwards-compatibility with how the argument passed +into Exception has no restrictions. + +The ``args`` attribute is to be deprecated. While allowing multiple +arguments to be passed can be helpful, it is in no way essential. It +also does not make it necessarily clear which argument is going to be +represented by the ``__str__`` method. Restricting to a single +argument keeps the API simple and clear. This also means providing a +``__getitem__`` method is unneeded for exceptions and thus will be +deprecated as well. + +The ``raise`` statement will be changed to require that any object +passed to it must inherit from BaseException. This will make sure +that all exceptions fall within a single hierarchy that is anchored at +BaseException [#hierarchy-good]_. This also guarantees a basic +interface that is inherited from BaseException. The change to +``raise`` will be enforced starting in Python 3.0 (see the `Transition +Plan`_ below). + +With BaseException being the root of the exception hierarchy, +Exception will now inherit from it. + + +Exception Hierarchy Changes +=========================== + +With the exception hierarchy now even more important since it has a +basic root, a change to the existing hierarchy is called for. As it +stands now, if one wants to catch all exceptions that signal an error +*and* do not mean the interpreter should be allowed to exit, you must +specify all but two exceptions specifically in an ``except`` clause. +That is needlessly explicit. This PEP proposes moving +KeyboardInterrupt and SystemExit to inherit directly from +BaseException. + +Doing this makes catching Exception more reasonable. It would catch +only exceptions that signify errors. Exceptions that signal that the +intepreter should exit will not be caught and thus be allowed to +propagate up and allow the interpreter to terminate. + +KeyboardInterrupt has been moved since users typically expect an +application to exit when the press the interrupt key (usually Ctrl-C). +If people have overly broad ``except`` clauses the expected behaviour +does not occur. + +SystemExit has been moved for similar reasons. Since the exception is +raised when ``sys.exit()`` is called the interpreter should normally +be allowed to terminate. Unfortunately overly broad ``except`` +clauses can prevent the exit to occur which had been explicitly +requested. + +To make sure that people catch Exception most of the time, various +parts of the documentation and tutorials will need to be updated to +strongly suggest that Exception be what programmers want to use. Bare +``except`` clauses or catching BaseException directly should be +discouraged based on the fact that KeyboardInterrupt and SystemExit +almost always should be allowed to propagate up. + + +Transition Plan +=============== + +Since semantic changes to Python are being proposed, a transition plan +is needed. The goal is to end up with the new semantics being used in +Python 3.0 while providing a smooth transition for 2.x code. All +deprecations mentioned in the plan will lead to the removal of the +semantics starting in the version following the introduction of the +deprecation. + +* Python 2.5 + + - introduce BaseException + + + allow exceptions to be new-style classes + + + all standard exceptions become new-style classes + + - Exception, KeyboardInterrupt, and SystemExit inherit from BaseException + + - deprecate raising string exceptions + +* Python 2.6 + + - deprecate catching string exceptions + +* Python 2.7 + + - deprecate raising exceptions that do not inherit from BaseException + +* Python 2.8 + + - deprecate catching exceptions that do not inherit from BaseException + + - deprecate ``args`` and ``__getitem__`` + + +References +========== + +.. [#pep348] PEP 348 (Exception Reorganization for Python 3.0) + http://www.python.org/peps/pep-0348.html + +.. [#hierarchy-good] python-dev Summary for 2004-08-01 through 2004-08-15 + http://www.python.org/dev/summary/2004-08-01_2004-08-15.html#an-exception-is-an-exception-unless-it-doesn-t-inherit-from-exception + + +Copyright +========= + +This document has been placed in the public domain. + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: From neal.norwitz at python.org Fri Oct 28 07:52:23 2005 From: neal.norwitz at python.org (neal.norwitz@python.org) Date: Fri, 28 Oct 2005 07:52:23 +0200 (CEST) Subject: [Python-checkins] commit of r41340 - in python/trunk: Lib Lib/test Misc Message-ID: <20051028055223.15E751E4002@bag.python.org> Author: neal.norwitz Date: Fri Oct 28 07:52:22 2005 New Revision: 41340 Modified: python/trunk/Lib/tarfile.py python/trunk/Lib/test/test_tarfile.py python/trunk/Misc/NEWS Log: Patch #1338314, Bug #1336623: fix tarfile so it can extract REGTYPE directories from tarfiles written by old programs. Will backport. Modified: python/trunk/Lib/tarfile.py ============================================================================== --- python/trunk/Lib/tarfile.py (original) +++ python/trunk/Lib/tarfile.py Fri Oct 28 07:52:22 2005 @@ -739,6 +739,11 @@ tarinfo.devmajor = tarinfo.devmajor = 0 tarinfo.prefix = buf[345:500] + # Some old tar programs represent a directory as a regular + # file with a trailing slash. + if tarinfo.isreg() and tarinfo.name.endswith("/"): + tarinfo.type = DIRTYPE + # The prefix field is used for filenames > 100 in # the POSIX standard. # name = prefix + '/' + name @@ -746,7 +751,7 @@ tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name)) # Directory names should have a '/' at the end. - if tarinfo.isdir() and tarinfo.name[-1:] != "/": + if tarinfo.isdir(): tarinfo.name += "/" return tarinfo @@ -1716,10 +1721,6 @@ # Skip the following data blocks. self.offset += self._block(tarinfo.size) - if tarinfo.isreg() and tarinfo.name[:-1] == "/": - # some old tar programs don't know DIRTYPE - tarinfo.type = DIRTYPE - self.members.append(tarinfo) return tarinfo Modified: python/trunk/Lib/test/test_tarfile.py ============================================================================== --- python/trunk/Lib/test/test_tarfile.py (original) +++ python/trunk/Lib/test/test_tarfile.py Fri Oct 28 07:52:22 2005 @@ -144,6 +144,36 @@ "readlines() after seek failed") fobj.close() + def test_old_dirtype(self): + """Test old style dirtype member (bug #1336623). + """ + # Old tars create directory members using a REGTYPE + # header with a "/" appended to the filename field. + + # Create an old tar style directory entry. + filename = tmpname() + tarinfo = tarfile.TarInfo("directory/") + tarinfo.type = tarfile.REGTYPE + + fobj = file(filename, "w") + fobj.write(tarinfo.tobuf()) + fobj.close() + + try: + # Test if it is still a directory entry when + # read back. + tar = tarfile.open(filename) + tarinfo = tar.getmembers()[0] + tar.close() + + self.assert_(tarinfo.type == tarfile.DIRTYPE) + self.assert_(tarinfo.name.endswith("/")) + finally: + try: + os.unlink(filename) + except: + pass + class ReadStreamTest(ReadTest): sep = "|" Modified: python/trunk/Misc/NEWS ============================================================================== --- python/trunk/Misc/NEWS (original) +++ python/trunk/Misc/NEWS Fri Oct 28 07:52:22 2005 @@ -177,6 +177,9 @@ Extension Modules ----------------- +- Patch #1338314, Bug #1336623: fix tarfile so it can extract + REGTYPE directories from tarfiles written by old programs. + - Get bsddb module to build with BSD DB version 3.2 - Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1, From neal.norwitz at python.org Fri Oct 28 08:00:52 2005 From: neal.norwitz at python.org (neal.norwitz@python.org) Date: Fri, 28 Oct 2005 08:00:52 +0200 (CEST) Subject: [Python-checkins] commit of r41341 - in python/branches/release24-maint: Lib Lib/test Misc Message-ID: <20051028060052.E54A61E4002@bag.python.org> Author: neal.norwitz Date: Fri Oct 28 08:00:51 2005 New Revision: 41341 Modified: python/branches/release24-maint/Lib/tarfile.py python/branches/release24-maint/Lib/test/test_tarfile.py python/branches/release24-maint/Misc/NEWS Log: Backport: - Patch #1338314, Bug #1336623: fix tarfile so it can extract REGTYPE directories from tarfiles written by old programs. Modified: python/branches/release24-maint/Lib/tarfile.py ============================================================================== --- python/branches/release24-maint/Lib/tarfile.py (original) +++ python/branches/release24-maint/Lib/tarfile.py Fri Oct 28 08:00:51 2005 @@ -688,6 +688,11 @@ tarinfo.devmajor = tarinfo.devmajor = 0 tarinfo.prefix = buf[345:500] + # Some old tar programs represent a directory as a regular + # file with a trailing slash. + if tarinfo.isreg() and tarinfo.name.endswith("/"): + tarinfo.type = DIRTYPE + # The prefix field is used for filenames > 100 in # the POSIX standard. # name = prefix + '/' + name @@ -695,7 +700,7 @@ tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name)) # Directory names should have a '/' at the end. - if tarinfo.isdir() and tarinfo.name[-1:] != "/": + if tarinfo.isdir(): tarinfo.name += "/" return tarinfo @@ -1628,10 +1633,6 @@ # Skip the following data blocks. self.offset += self._block(tarinfo.size) - if tarinfo.isreg() and tarinfo.name[:-1] == "/": - # some old tar programs don't know DIRTYPE - tarinfo.type = DIRTYPE - self.members.append(tarinfo) return tarinfo Modified: python/branches/release24-maint/Lib/test/test_tarfile.py ============================================================================== --- python/branches/release24-maint/Lib/test/test_tarfile.py (original) +++ python/branches/release24-maint/Lib/test/test_tarfile.py Fri Oct 28 08:00:51 2005 @@ -134,6 +134,30 @@ "readlines() after seek failed") fobj.close() + def test_old_dirtype(self): + """Test old style dirtype member (bug #1336623). + """ + # Old tars create directory members using a REGTYPE + # header with a "/" appended to the filename field. + + # Create an old tar style directory entry. + filename = tmpname() + tarinfo = tarfile.TarInfo("directory/") + tarinfo.type = tarfile.REGTYPE + + fobj = file(filename, "w") + fobj.write(tarinfo.tobuf()) + fobj.close() + + # Test if it is still a directory entry when + # read back. + tar = tarfile.open(filename) + tarinfo = tar.getmembers()[0] + tar.close() + + self.assert_(tarinfo.type == tarfile.DIRTYPE) + self.assert_(tarinfo.name.endswith("/")) + class ReadStreamTest(ReadTest): sep = "|" Modified: python/branches/release24-maint/Misc/NEWS ============================================================================== --- python/branches/release24-maint/Misc/NEWS (original) +++ python/branches/release24-maint/Misc/NEWS Fri Oct 28 08:00:51 2005 @@ -25,6 +25,9 @@ Extension Modules ----------------- +- Patch #1338314, Bug #1336623: fix tarfile so it can extract + REGTYPE directories from tarfiles written by old programs. + - Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1, but Python incorrectly assumes it is in UTF-8 format From fred.drake at python.org Fri Oct 28 16:39:49 2005 From: fred.drake at python.org (fred.drake@python.org) Date: Fri, 28 Oct 2005 16:39:49 +0200 (CEST) Subject: [Python-checkins] commit of r41342 - in python/trunk: Lib Lib/test Mac/Tools/IDE Misc Objects PC/os2emx Tools/msi Message-ID: <20051028143949.A7CA41E4007@bag.python.org> Author: fred.drake Date: Fri Oct 28 16:39:47 2005 New Revision: 41342 Modified: python/trunk/Lib/pydoc.py python/trunk/Lib/test/test_dircache.py python/trunk/Lib/test/test_threadsignals.py python/trunk/Lib/trace.py python/trunk/Mac/Tools/IDE/Wsocket.py python/trunk/Misc/cheatsheet python/trunk/Objects/unicodeobject.c python/trunk/PC/os2emx/dlfcn.c python/trunk/PC/os2emx/dlfcn.h python/trunk/Tools/msi/uisample.py Log: fix typos, mostly in comments Modified: python/trunk/Lib/pydoc.py ============================================================================== --- python/trunk/Lib/pydoc.py (original) +++ python/trunk/Lib/pydoc.py Fri Oct 28 16:39:47 2005 @@ -273,7 +273,7 @@ # Did the error occur before or after the module was found? (exc, value, tb) = info = sys.exc_info() if path in sys.modules: - # An error occured while executing the imported module. + # An error occurred while executing the imported module. raise ErrorDuringImport(sys.modules[path].__file__, info) elif exc is SyntaxError: # A SyntaxError occurred before we could execute the module. Modified: python/trunk/Lib/test/test_dircache.py ============================================================================== --- python/trunk/Lib/test/test_dircache.py (original) +++ python/trunk/Lib/test/test_dircache.py Fri Oct 28 16:39:47 2005 @@ -46,7 +46,7 @@ if sys.platform[:3] not in ('win', 'os2'): # Sadly, dircache has the same granularity as stat.mtime, and so - # can't notice any changes that occured within 1 sec of the last + # can't notice any changes that occurred within 1 sec of the last # time it examined a directory. time.sleep(1) self.writeTemp("test1") Modified: python/trunk/Lib/test/test_threadsignals.py ============================================================================== --- python/trunk/Lib/test/test_threadsignals.py (original) +++ python/trunk/Lib/test/test_threadsignals.py Fri Oct 28 16:39:47 2005 @@ -21,7 +21,7 @@ return usr1, usr2, alrm -# The signal handler. Just note that the signal occured and +# The signal handler. Just note that the signal occurred and # from who. def handle_signals(sig,frame): signal_blackboard[sig]['tripped'] += 1 Modified: python/trunk/Lib/trace.py ============================================================================== --- python/trunk/Lib/trace.py (original) +++ python/trunk/Lib/trace.py Fri Oct 28 16:39:47 2005 @@ -133,7 +133,7 @@ # the ignore list n = len(mod) # (will not overflow since if the first n characters are the - # same and the name has not already occured, then the size + # same and the name has not already occurred, then the size # of "name" is greater than that of "mod") if mod == modulename[:n] and modulename[n] == '.': self._ignore[modulename] = 1 Modified: python/trunk/Mac/Tools/IDE/Wsocket.py ============================================================================== --- python/trunk/Mac/Tools/IDE/Wsocket.py (original) +++ python/trunk/Mac/Tools/IDE/Wsocket.py Fri Oct 28 16:39:47 2005 @@ -163,7 +163,7 @@ def settotal(int total): gets called when the connection knows the data size def setcurrent(int current): gets called when some new data has arrived def done(): gets called when the transaction is complete - def error(type, value, tb): gets called wheneven an error occured + def error(type, value, tb): gets called wheneven an error occurred """ def __init__(self, settotal_func, setcurrent_func, done_func, error_func): Modified: python/trunk/Misc/cheatsheet ============================================================================== --- python/trunk/Misc/cheatsheet (original) +++ python/trunk/Misc/cheatsheet Fri Oct 28 16:39:47 2005 @@ -1308,7 +1308,7 @@ tb_next (frame/None, R/O): next level in stack trace (toward the frame where the exception occurred) tb_frame (frame, R/O): execution frame of the current level - tb_lineno (int, R/O): line number where the exception occured + tb_lineno (int, R/O): line number where the exception occurred tb_lasti (int, R/O): precise instruction (index into bytecode) Slices: @@ -1597,7 +1597,7 @@ maxsplit=0]]) join(words[, sep=' Concatenate a list or tuple of words with ']) interveningseparators; inverse of split. -replace(s, old, Returns a copy of string with all occurences of +replace(s, old, Returns a copy of string with all occurrences of new[, maxsplit=0] substring replaced by . Limits to firstsubstitutions if specified. strip(s) Return a string that is (a copy of) without leadingand @@ -1620,7 +1620,7 @@ $ matches end of the string (of every line in MULTILINE mode) * 0 or more of preceding regular expression (as many as possible) + 1 or more of preceding regular expression (as many as possible) -? 0 or 1 occurence of preceding regular expression +? 0 or 1 occurrence of preceding regular expression *?, +?, ?? Same as *, + and ? but matches as few characters as possible {m,n} matches from m to n repetitions of preceding RE {m,n}? idem, attempting to match as few repetitions as possible Modified: python/trunk/Objects/unicodeobject.c ============================================================================== --- python/trunk/Objects/unicodeobject.c (original) +++ python/trunk/Objects/unicodeobject.c Fri Oct 28 16:39:47 2005 @@ -734,7 +734,7 @@ /* error handling callback helper: build arguments, call the callback and check the arguments, - if no exception occured, copy the replacement to the output + if no exception occurred, copy the replacement to the output and adjust various state variables. return 0 on success, -1 on error */ @@ -2987,7 +2987,7 @@ /* Lookup the character ch in the mapping. If the character can't be found, Py_None is returned (or NULL, if another - error occured). */ + error occurred). */ static PyObject *charmapencode_lookup(Py_UNICODE c, PyObject *mapping) { PyObject *w = PyInt_FromLong((long)c); Modified: python/trunk/PC/os2emx/dlfcn.c ============================================================================== --- python/trunk/PC/os2emx/dlfcn.c (original) +++ python/trunk/PC/os2emx/dlfcn.c Fri Oct 28 16:39:47 2005 @@ -216,7 +216,7 @@ } } -/* return a string describing last occured dl error */ +/* return a string describing last occurred dl error */ char *dlerror() { return dlerr; Modified: python/trunk/PC/os2emx/dlfcn.h ============================================================================== --- python/trunk/PC/os2emx/dlfcn.h (original) +++ python/trunk/PC/os2emx/dlfcn.h Fri Oct 28 16:39:47 2005 @@ -45,7 +45,7 @@ /* free dynamicaly-linked library */ int dlclose(void *handle); -/* return a string describing last occured dl error */ +/* return a string describing last occurred dl error */ char *dlerror(void); #endif /* !_DLFCN_H */ Modified: python/trunk/Tools/msi/uisample.py ============================================================================== --- python/trunk/Tools/msi/uisample.py (original) +++ python/trunk/Tools/msi/uisample.py Fri Oct 28 16:39:47 2005 @@ -1,3 +1,4 @@ + import msilib,os;dirname=os.path.dirname(__file__) AdminExecuteSequence = [ (u'InstallValidate', None, 1400), @@ -1393,7 +1394,7 @@ (1932, u'The Windows Installer service cannot update the protected Windows file [2]. {{Package version: [3], OS Protected version: [4], SFP Error: [5]}}'), (1933, u'The Windows Installer service cannot update one or more protected Windows files. {{SFP Error: [2]. List of protected files:\\r\\n[3]}}'), (1934, u'User installations are disabled via policy on the machine.'), -(1935, u'An error occured during the installation of assembly component [2]. HRESULT: [3]. {{assembly interface: [4], function: [5], assembly name: [6]}}'), +(1935, u'An error occurred during the installation of assembly component [2]. HRESULT: [3]. {{assembly interface: [4], function: [5], assembly name: [6]}}'), ] tables=['AdminExecuteSequence', 'AdminUISequence', 'AdvtExecuteSequence', 'BBControl', 'Billboard', 'Binary', 'CheckBox', 'Property', 'ComboBox', 'Control', 'ListBox', 'ActionText', 'ControlCondition', 'ControlEvent', 'Dialog', 'EventMapping', 'InstallExecuteSequence', 'InstallUISequence', 'ListView', 'RadioButton', 'TextStyle', 'UIText', '_Validation', 'Error'] From brett.cannon at python.org Fri Oct 28 21:35:58 2005 From: brett.cannon at python.org (brett.cannon@python.org) Date: Fri, 28 Oct 2005 21:35:58 +0200 (CEST) Subject: [Python-checkins] commit of r41343 - peps/trunk Message-ID: <20051028193558.77BDF1E407C@bag.python.org> Author: brett.cannon Date: Fri Oct 28 21:35:57 2005 New Revision: 41343 Modified: peps/trunk/pep-0352.txt Log: Merge in suggestions from Neal Norwitz. - Simplified __init__ and fixed __getitem__ - Added another way to catch all exceptions sans KeyboardInterrupt and SystemExit as Python stands now (does not invalidate argument that it it needlessly explicit) - Added a diagram of how the exception hierarchy will look - Small grammar and clarification fixes Modified: peps/trunk/pep-0352.txt ============================================================================== --- peps/trunk/pep-0352.txt (original) +++ peps/trunk/pep-0352.txt Fri Oct 28 21:35:57 2005 @@ -22,10 +22,10 @@ inherit from. Imposing the restriction will allow a standard interface for exceptions to exist that can be relied upon. -On might counter that requiring a specific base class for a particular -interface is unPythonic. However, in the specific case of exceptions -there's a good reason (which has generally been agreed to on -python-dev): requiring hierarchy helps code that wants to *catch* +One might counter that requiring a specific base class for a +particular interface is unPythonic. However, in the specific case of +exceptions there's a good reason (which has generally been agreed to +on python-dev): requiring hierarchy helps code that wants to *catch* exceptions by making it possible to catch *all* exceptions explicitly by writing ``except BaseException:`` instead of ``except *:``. [#hierarchy-good]_ @@ -48,7 +48,8 @@ ============================= This PEP proposes introducing a new exception named BaseException that -is a new-style class and has a single attribute, ``message``:: +is a new-style class and has a single attribute, ``message`` (that +will cause the deprecation of the existing ``args`` attribute):: class BaseException(object): @@ -65,9 +66,7 @@ def __init__(self, message='', *args): """Set 'message' and 'args' attribute""" self.message = message - self.args = (message,) - if args: - self.args += args + self.args = (message,) + args def __str__(self): """Return the str of 'message'""" @@ -97,10 +96,7 @@ deprecated. """ - if index == 0: - return self.message - else: - return self.args[index-1] + return self.args[index] The ``message`` attribute will contain either the argument passed in @@ -114,13 +110,13 @@ This provides backwards-compatibility with how the argument passed into Exception has no restrictions. -The ``args`` attribute is to be deprecated. While allowing multiple +The ``args`` attribute is deprecated. While allowing multiple arguments to be passed can be helpful, it is in no way essential. It -also does not make it necessarily clear which argument is going to be -represented by the ``__str__`` method. Restricting to a single -argument keeps the API simple and clear. This also means providing a -``__getitem__`` method is unneeded for exceptions and thus will be -deprecated as well. +also does not make it clear which argument is going to be represented +by the ``__str__`` method. Restricting initialization to accepting a +single argument keeps the API simple and clear. This also means +providing a ``__getitem__`` method is unneeded for exceptions and thus +will be deprecated as well. The ``raise`` statement will be changed to require that any object passed to it must inherit from BaseException. This will make sure @@ -141,11 +137,27 @@ basic root, a change to the existing hierarchy is called for. As it stands now, if one wants to catch all exceptions that signal an error *and* do not mean the interpreter should be allowed to exit, you must -specify all but two exceptions specifically in an ``except`` clause. +specify all but two exceptions specifically in an ``except`` clause +or catch the two exceptions separately and then re-raise them and +have all other exceptions fall through to a bare ``except`` clause:: + + except (KeyboardInterrupt, SystemExit): + raise + except: + ... + That is needlessly explicit. This PEP proposes moving KeyboardInterrupt and SystemExit to inherit directly from BaseException. +:: + + - BaseException + |- KeyboardInterrupt + |- SystemExit + |- Exception + |- (all other current built-in exceptions) + Doing this makes catching Exception more reasonable. It would catch only exceptions that signify errors. Exceptions that signal that the intepreter should exit will not be caught and thus be allowed to From martin.v.loewis at python.org Sat Oct 29 01:05:19 2005 From: martin.v.loewis at python.org (martin.v.loewis@python.org) Date: Sat, 29 Oct 2005 01:05:19 +0200 (CEST) Subject: [Python-checkins] commit of r41344 - peps/trunk Message-ID: <20051028230519.DDD5F1E4007@bag.python.org> Author: martin.v.loewis Date: Sat Oct 29 01:05:19 2005 New Revision: 41344 Modified: peps/trunk/pep-0001-1.png (props changed) Log: Remove bogus properties on png file. From martin.v.loewis at python.org Sat Oct 29 01:08:02 2005 From: martin.v.loewis at python.org (martin.v.loewis@python.org) Date: Sat, 29 Oct 2005 01:08:02 +0200 (CEST) Subject: [Python-checkins] commit of r41345 - peps/trunk Message-ID: <20051028230802.670751E4007@bag.python.org> Author: martin.v.loewis Date: Sat Oct 29 01:08:02 2005 New Revision: 41345 Modified: peps/trunk/pep-0001-1.png Log: Reimport png from CVS. Modified: peps/trunk/pep-0001-1.png ============================================================================== Binary files. No diff available. From vinay.sajip at python.org Sat Oct 29 02:40:15 2005 From: vinay.sajip at python.org (vinay.sajip@python.org) Date: Sat, 29 Oct 2005 02:40:15 +0200 (CEST) Subject: [Python-checkins] commit of r41346 - python/trunk/Doc/lib Message-ID: <20051029004015.AB23C1E4007@bag.python.org> Author: vinay.sajip Date: Sat Oct 29 02:40:15 2005 New Revision: 41346 Modified: python/trunk/Doc/lib/liblogging.tex Log: Minor update to HTTPHandler documentation to indicate that host:port can be used. Modified: python/trunk/Doc/lib/liblogging.tex ============================================================================== --- python/trunk/Doc/lib/liblogging.tex (original) +++ python/trunk/Doc/lib/liblogging.tex Sat Oct 29 02:40:15 2005 @@ -1253,7 +1253,9 @@ \begin{classdesc}{HTTPHandler}{host, url\optional{, method}} Returns a new instance of the \class{HTTPHandler} class. The instance is initialized with a host address, url and HTTP method. -If no \var{method} is specified, \samp{GET} is used. +The \var{host} can be of the form \code{host:port}, should you need to +use a specific port number. If no \var{method} is specified, \samp{GET} +is used. \end{classdesc} \begin{methoddesc}{emit}{record} From tim.peters at python.org Sat Oct 29 04:33:18 2005 From: tim.peters at python.org (tim.peters@python.org) Date: Sat, 29 Oct 2005 04:33:18 +0200 (CEST) Subject: [Python-checkins] commit of r41347 - python/trunk/Include Message-ID: <20051029023318.C19301E4007@bag.python.org> Author: tim.peters Date: Sat Oct 29 04:33:18 2005 New Revision: 41347 Modified: python/trunk/Include/unicodeobject.h Log: _PyUnicode_IsWhitespace(), _PyUnicode_IsLinebreak(): Changed the declarations to match the definitions. Don't know why they differed; MSVC warned about it; don't know why only these two functions use "const". Someone who does may want to do something saner ;-). Modified: python/trunk/Include/unicodeobject.h ============================================================================== --- python/trunk/Include/unicodeobject.h (original) +++ python/trunk/Include/unicodeobject.h Sat Oct 29 04:33:18 2005 @@ -1152,11 +1152,11 @@ ); PyAPI_FUNC(int) _PyUnicode_IsWhitespace( - Py_UNICODE ch /* Unicode character */ + const Py_UNICODE ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsLinebreak( - Py_UNICODE ch /* Unicode character */ + const Py_UNICODE ch /* Unicode character */ ); PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase( From brett.cannon at python.org Sat Oct 29 05:22:31 2005 From: brett.cannon at python.org (brett.cannon@python.org) Date: Sat, 29 Oct 2005 05:22:31 +0200 (CEST) Subject: [Python-checkins] commit of r41348 - peps/trunk Message-ID: <20051029032231.85EE11E4007@bag.python.org> Author: brett.cannon Date: Sat Oct 29 05:22:31 2005 New Revision: 41348 Modified: peps/trunk/pep-0352.txt Log: Update the code for BaseException to have 'args' be more backwards-compatible. Also reformat some methods to use the conditional operator to make the code simpler. Also moved the deprecation of 'args' and '__getitem__' to Python 2.9 so that it won't be removed during the 2.x series. Modified: peps/trunk/pep-0352.txt ============================================================================== --- peps/trunk/pep-0352.txt (original) +++ peps/trunk/pep-0352.txt Sat Oct 29 05:22:31 2005 @@ -66,27 +66,26 @@ def __init__(self, message='', *args): """Set 'message' and 'args' attribute""" self.message = message - self.args = (message,) + args + self.args = ((message,) + args + if message != '' + else tuple()) def __str__(self): """Return the str of 'message'""" - if len(self.args) > 1: - return str(self.args) - else: - return str(self.message) + return str(self.message + if not self.args + else self.args) def __unicode__(self): """Return the unicode of 'message'""" - if len(self.args) > 1: - return unicode(self.args) - else: - return unicode(self.message) + return unicode(self.message + if not self.args + else self.args) def __repr__(self): - if len(self.args) > 1: - args_repr = "*%s" % self.args - else: - args_repr = repr(self.message) + args_repr = (repr(self.message) + if not self.args + else "*%r" % self.args) return "%s(%s)" % (self.__class__.__name__, args_repr) def __getitem__(self, index): @@ -190,7 +189,8 @@ Python 3.0 while providing a smooth transition for 2.x code. All deprecations mentioned in the plan will lead to the removal of the semantics starting in the version following the introduction of the -deprecation. +deprecation and the raising of a DeprecationWarning for the version +specifically listed. * Python 2.5 @@ -216,6 +216,9 @@ - deprecate catching exceptions that do not inherit from BaseException + +* Python 2.9 + - deprecate ``args`` and ``__getitem__`` From brett.cannon at python.org Sat Oct 29 05:26:52 2005 From: brett.cannon at python.org (brett.cannon@python.org) Date: Sat, 29 Oct 2005 05:26:52 +0200 (CEST) Subject: [Python-checkins] commit of r41349 - peps/trunk Message-ID: <20051029032652.29C951E4007@bag.python.org> Author: brett.cannon Date: Sat Oct 29 05:26:51 2005 New Revision: 41349 Modified: peps/trunk/pep-0352.txt Log: Fix __init__ for BaseException to be completely backwards-compatible for 'args'. Modified: peps/trunk/pep-0352.txt ============================================================================== --- peps/trunk/pep-0352.txt (original) +++ peps/trunk/pep-0352.txt Sat Oct 29 05:26:51 2005 @@ -63,12 +63,10 @@ """ - def __init__(self, message='', *args): + def __init__(self, *args): """Set 'message' and 'args' attribute""" - self.message = message - self.args = ((message,) + args - if message != '' - else tuple()) + self.args = args + self.message = args[0] if args else '' def __str__(self): """Return the str of 'message'""" From nick.coghlan at python.org Sat Oct 29 08:08:13 2005 From: nick.coghlan at python.org (nick.coghlan@python.org) Date: Sat, 29 Oct 2005 08:08:13 +0200 (CEST) Subject: [Python-checkins] commit of r41350 - peps/trunk Message-ID: <20051029060813.4DB5C1E4007@bag.python.org> Author: nick.coghlan Date: Sat Oct 29 08:08:12 2005 New Revision: 41350 Modified: peps/trunk/pep-0343.txt Log: Update with outcome of recent python-dev discussions Modified: peps/trunk/pep-0343.txt ============================================================================== --- peps/trunk/pep-0343.txt (original) +++ peps/trunk/pep-0343.txt Sat Oct 29 08:08:12 2005 @@ -7,17 +7,17 @@ Type: Standards Track Content-Type: text/plain Created: 13-May-2005 -Post-History: 2-Jun-2005 +Post-History: 2-Jun-2005, 16-Oct-2005, 29-Oct-2005 Abstract This PEP adds a new statement "with" to the Python language to make it possible to factor out standard uses of try/finally statements. - The PEP has been approved in principle by the BDFL, but there are + The PEP was approved in principle by the BDFL, but there were still a couple of implementation details to be worked out (see the - section on Open Issues). It's been reverted to Draft status until - those issues have been settled to Guido's satisfaction. + section on Resolved Issues). It's still at Draft status until + Guido gives a final blessing to the updated PEP. Author's Note @@ -225,7 +225,7 @@ The translation of the above statement is: - abc = (EXPR).__with__() + abc = (EXPR).__context__() exc = (None, None, None) VAR = abc.__enter__() try: @@ -241,14 +241,18 @@ accessible to the user; they will most likely be implemented as special registers or stack positions. - The call to the __with__() method serves a similar purpose to that - of the __iter__() method of iterator and iterables. An object with - with simple state requirements (such as threading.RLock) may provide - its own __enter__() and __exit__() methods, and simply return - 'self' from its __with__ method. On the other hand, an object with - more complex state requirements (such as decimal.Context) may - return a distinct context manager object each time its __with__ - method is invoked. + The above translation is fairly literal - if any of the relevant + methods are not found as expected, the interpreter will raise + AttributeError. + + The call to the __context__() method serves a similar purpose to + that of the __iter__() method of iterator and iterables. An + object with with simple state requirements (such as + threading.RLock) may provide its own __enter__() and __exit__() + methods, and simply return 'self' from its __context__ method. On + the other hand, an object with more complex state requirements + (such as decimal.Context) may return a distinct context manager + object each time its __context__ method is invoked. If the "as VAR" part of the syntax is omitted, the "VAR =" part of the translation is omitted (but abc.__enter__() is still called). @@ -284,12 +288,12 @@ that makes it possible to use a generator that yields exactly once to control a with-statement. Here's a sketch of such a decorator: - class GeneratorContext(object): + class GeneratorContextManager(object): def __init__(self, gen): self.gen = gen - def __with__(self): + def __context__(self): return self def __enter__(self): @@ -314,14 +318,14 @@ else: raise RuntimeError("generator caught exception") - def context(func): + def contextmanager(func): def helper(*args, **kwds): - return GeneratorContext(func(*args, **kwds)) + return GeneratorContextManager(func(*args, **kwds)) return helper This decorator could be used as follows: - @context + @contextmanager def opening(filename): f = open(filename) # IOError is untouched by GeneratorContext try: @@ -329,16 +333,18 @@ finally: f.close() # Ditto for errors here (however unlikely) - A robust implementation of this decorator should be made part of - the standard library. Refer to Open Issues regarding its name and - location. + A robust builtin implementation of this decorator will be made + part of the standard library. Just as generator-iterator functions are very useful for writing __iter__() methods for iterables, generator-context functions will - be very useful for writing __with__() methods for contexts. It is - proposed that the invocation of the "context" decorator be - considered implicit for generator functions used as __with__() - methods (again, refer to the Open Issues section). + be very useful for writing __context__() methods for contexts. + These methods will still need to be decorated using the + contextmanager decorator. To ensure an obvious error message if the + decorator is left out, generator-iterator objects will NOT be given + a native context - if you want to ensure a generator is closed + promptly, use something similar to the duck-typed "closing" context + manager in the examples. Optional Extensions @@ -371,6 +377,15 @@ second with-statement calls f.__enter__() again. A similar error can be raised if __enter__ is invoked on a closed file object. + For Python 2.5, the following candidates have been identified for + native context managers: + - file + - decimal.Context + - thread.LockType + - threading.Lock + - threading.RLock + - threading.Condition + Standard Terminology Discussions about iterators and iterables are aided by the standard @@ -384,7 +399,7 @@ This PEP proposes that the protocol used by the with statement be known as the "context management protocol", and that objects that implement that protocol be known as "context managers". The term - "context" then encompasses all objects with a __with__() method + "context" then encompasses all objects with a __context__() method that returns a context manager (this means that all context managers are contexts, but not all contexts are context managers). @@ -395,50 +410,13 @@ In cases where the general term "context" would be ambiguous, it can be made explicit by expanding it to "manageable context". -Open Issues - - Discussion on python-dev revealed some open issues. These are listed - here and will be resolved either by consensus on python-dev or by - BDFL fiat. - - 1. The name of the decorator used to convert a generator-iterator - function into a generator-context function is still to be - finalised. - The proposal in this PEP is that it be called simply "context" - with the following reasoning: - - A "generator function" is an undecorated function containing - the 'yield' keyword, and the objects produced by - such functions are "generator-iterators". The term - "generator" may refer to either a generator function or a - generator-iterator depending on the situation. - - A "generator context function" is a generator function to - which the "context" decorator is applied and the objects - produced by such functions are "generator-context-managers". - The term "generator context" may refer to either a generator - context function or a generator-context-manager depending on - the situation. - - 2. Should the decorator to convert a generator function into a - generator context function be a builtin, or located elsewhere in - the standard library? This PEP suggests that it should be a - builtin, as generator context functions are the recommended way - of writing new context managers. - - 3. Should a generator function used to implement a __with__ method - always be considered to be a generator context function, without - requiring the context decorator? This PEP suggests that it - should, as applying a decorator to a slot just looks strange, - and omitting the decorator would be a source of obscure bugs. - The __new__ slot provides some precedent for special casing of - certain slots when processing slot methods. - Resolved Issues - The following issues were resolved either by BDFL fiat, consensus on - python-dev, or a simple lack of objection to proposals in the - original version of this PEP. + The following issues were resolved either by BDFL approval, + consensus on python-dev, or a simple lack of objection to + proposals in the original version of this PEP. - 1. The __exit__() method of the GeneratorContext class + 1. The __exit__() method of the GeneratorContextManager class catches StopIteration and considers it equivalent to re-raising the exception passed to throw(). Is allowing StopIteration right here? @@ -458,10 +436,10 @@ finally-clause (the one implicit in the with-statement) which re-raises the original exception anyway. - 2. What exception should GeneratorContext raise when the underlying - generator-iterator misbehaves? The following quote is the reason - behind Guido's choice of RuntimeError for both this and for the - generator close() method in PEP 342 (from [8]): + 2. What exception should GeneratorContextManager raise when the + underlying generator-iterator misbehaves? The following quote is + the reason behind Guido's choice of RuntimeError for both this + and for the generator close() method in PEP 342 (from [8]): "I'd rather not introduce a new exception class just for this purpose, since it's not an exception that I want people to catch: @@ -477,24 +455,27 @@ on python-dev [4] settled on the term "context manager" for objects which provide __enter__ and __exit__ methods, and "context management protocol" for the protocol itself. With the - addition of the __with__ method to the protocol, a natural - extension is to call all objects which provide a __with__ method - "contexts" (or "manageable contexts" in situations where the - general term "context" would be ambiguous). + addition of the __context__ method to the protocol, a natural + extension is to call all objects which provide a __context__ + method "contexts" (or "manageable contexts" in situations where + the general term "context" would be ambiguous). This is now documented in the "Standard Terminology" section. 4. The originally approved version of this PEP did not include a - __with__ method - the method was only added to the PEP after + __context__ method - the method was only added to the PEP after Jason Orendorff pointed out the difficulty of writing appropriate __enter__ and __exit__ methods for decimal.Context [5]. This approach allows a class to define a native context manager using generator syntax. It also allows a class to use an existing independent context manager as its native context manager by applying the independent context manager to 'self' in - its __with__ method. It even allows a class written in C to use - a generator context manager written in Python. - The __with__ method parallels the __iter__ method which forms + its __context__ method. It even allows a class written in C to + use a generator context manager written in Python. + The __context__ method parallels the __iter__ method which forms part of the iterator protocol. + An earlier version of this PEP called this the __with__ method. + This was later changed to match the name of the protocol rather + than the keyword for the statement [9]. 5. The suggestion was made by Jason Orendorff that the __enter__ and __exit__ methods could be removed from the context @@ -514,18 +495,56 @@ works without having to first understand the mechanics of how generator context managers are implemented. + 6. The decorator to make a context manager from a generator will be + a builtin called "contextmanager". The shorter term "context" was + considered too ambiguous and potentially confusing [9]. + The different flavours of generators can then be described as: + - A "generator function" is an undecorated function containing + the 'yield' keyword, and the objects produced by + such functions are "generator-iterators". The term + "generator" may refer to either a generator function or a + generator-iterator depending on the situation. + - A "generator context function" is a generator function to + which the "contextmanager" decorator is applied and the + objects produced by such functions are "generator-context- + managers". The term "generator context" may refer to either a + generator context function or a generator-context-manager + depending on the situation. + + 7. A generator function used to implement a __context__ method will + need to be decorated with the contextmanager decorator in order + to have the correct behaviour. Otherwise, you will get an + AttributeError when using the class in a with statement, as + normal generator-iterators will NOT have __enter__ or __exit__ + methods. + Getting deterministic closure of generators will require a + separate context manager such as the closing example below. + As Guido put it, "too much magic is bad for your health" [10]. + + 8. It is fine to raise AttributeError instead of TypeError if the + relevant methods aren't present on a class involved in a with + statement. The fact that the abstract object C API raises + TypeError rather than AttributeError is an accident of history, + rather than a deliberate design decision [11]. + Examples - (The generator based examples assume PEP 342 is implemented. Also, - some of the examples are likely to be unnecessary in practice, as - the appropriate objects, such as threading.RLock, will be able to - be used directly in with statements) + The generator based examples rely on PEP 342. Also, some of the + examples are likely to be unnecessary in practice, as the + appropriate objects, such as threading.RLock, will be able to be + used directly in with statements. + + The tense used in the names of the example context managers is not + arbitrary. Past tense ("-ed") is used when the name refers to an + action which is done in the __enter__ method and undone in the + __exit__ method. Progressive tense ("-ing") is used when the name + refers to an action which is to be done in the __exit__ method. 1. A template for ensuring that a lock, acquired at the start of a block, is released when the block is left: - @context - def locking(lock): + @contextmanager + def locked(lock): lock.acquire() try: yield @@ -534,20 +553,20 @@ Used as follows: - with locking(myLock): + with locked(myLock): # Code here executes with myLock held. The lock is # guaranteed to be released when the block is left (even # if via return or by an uncaught exception). - PEP 319 gives a use case for also having an unlocking() + PEP 319 gives a use case for also having an unlocked() template; this can be written very similarly (just swap the acquire() and release() calls). 2. A template for opening a file that ensures the file is closed when the block is left: - @context - def opening(filename, mode="r"): + @contextmanager + def opened(filename, mode="r"): f = open(filename, mode) try: yield f @@ -556,15 +575,15 @@ Used as follows: - with opening("/etc/passwd") as f: + with opened("/etc/passwd") as f: for line in f: print line.rstrip() 3. A template for committing or rolling back a database transaction: - @context - def transactional(db): + @contextmanager + def transaction(db): db.begin() try: yield None @@ -575,10 +594,10 @@ 4. Example 1 rewritten without a generator: - class locking: + class locked: def __init__(self, lock): self.lock = lock - def __with__(self, lock): + def __context__(self): return self def __enter__(self): self.lock.acquire() @@ -586,13 +605,14 @@ self.lock.release() (This example is easily modified to implement the other - examples; it shows that is is easy to avoid the need for a - generator if no special state needs to be preserved.) + relatively stateless examples; it shows that it is easy to avoid + the need for a generator if no special state needs to be + preserved.) 5. Redirect stdout temporarily: - @context - def redirecting_stdout(new_stdout): + @contextmanager + def stdout_redirected(new_stdout): save_stdout = sys.stdout sys.stdout = new_stdout try: @@ -602,18 +622,18 @@ Used as follows: - with opening(filename, "w") as f: - with redirecting_stdout(f): + with opened(filename, "w") as f: + with stdout_redirected(f): print "Hello world" This isn't thread-safe, of course, but neither is doing this same dance manually. In single-threaded programs (for example, in scripts) it is a popular way of doing things. - 6. A variant on opening() that also returns an error condition: + 6. A variant on opened() that also returns an error condition: - @context - def opening_w_error(filename, mode="r"): + @contextmanager + def opened_w_error(filename, mode="r"): try: f = open(filename, mode) except IOError, err: @@ -626,7 +646,7 @@ Used as follows: - with opening_w_error("/etc/passwd", "a") as (f, err): + with opened_w_error("/etc/passwd", "a") as (f, err): if err: print "IOError:", err else: @@ -637,7 +657,7 @@ import signal - with signal.blocking(): + with signal.blocked(): # code executed without worrying about signals An optional argument might be a list of signals to be blocked; @@ -679,7 +699,8 @@ 9. Here's a proposed native context manager for decimal.Context: # This would be a new decimal.Context method - def __with__(self): + @contextmanager + def __context__(self): # We set the thread context to a copy of this context # to ensure that changes within the block are kept # local to the block. This also gives us thread safety @@ -710,7 +731,7 @@ 10. A generic "object-closing" template: - @context + @contextmanager def closing(obj): try: yield obj @@ -737,6 +758,78 @@ for datum in data: process(datum) + 11. Native contexts for objects with acquire/release methods: + + # This would be a new method of e.g., threading.RLock + def __context__(self): + return locked(self) + + def released(self): + return unlocked(self) + + Sample usage: + + with my_lock: + # Operations with the lock held + with my_lock.released(): + # Operations without the lock + # e.g. blocking I/O + # Lock is held again here + + 12. A "nested" context manager that automatically nests the + supplied contexts from left-to-right to avoid excessive + indentation: + + class nested(object): + def __init__(*contexts): + self.contexts = contexts + self.entered = None + + def __context__(self): + return self + + def __enter__(self): + if self.entered is not None: + raise RuntimeError("Context is not reentrant") + self.entered = deque() + vars = [] + try: + for context in self.contexts: + mgr = context.__context__() + vars.append(mgr.__enter__()) + self.entered.appendleft(mgr) + except: + self.__exit__(*sys.exc_info()) + raise + return vars + + def __exit__(self, *exc_info): + # Behave like nested with statements + # first in, last out + # New exceptions override old ones + ex = exc_info + for mgr in self.entered: + try: + mgr.__exit__(*ex) + except: + ex = sys.exc_info() + self.entered = None + if ex is not exc_info: + raise ex[0], ex[1], ex[2] + + Sample usage: + + with nested(a, b, c) as (x, y, z): + # Perform operation + + Is equivalent to: + + with a as x: + with b as y: + with c as z: + # Perform operation + + References [1] http://blogs.msdn.com/oldnewthing/archive/2005/01/06/347666.aspx @@ -760,6 +853,15 @@ [8] http://mail.python.org/pipermail/python-dev/2005-June/054064.html + [9] + http://mail.python.org/pipermail/python-dev/2005-October/057520.html + + [10] + http://mail.python.org/pipermail/python-dev/2005-October/057535.html + + [11] + http://mail.python.org/pipermail/python-dev/2005-October/057625.html + Copyright This document has been placed in the public domain. From neil.schemenauer at python.org Sat Oct 29 16:36:03 2005 From: neil.schemenauer at python.org (neil.schemenauer@python.org) Date: Sat, 29 Oct 2005 16:36:03 +0200 (CEST) Subject: [Python-checkins] commit of r41351 - peps/trunk Message-ID: <20051029143603.141A11E4009@bag.python.org> Author: neil.schemenauer Date: Sat Oct 29 16:36:02 2005 New Revision: 41351 Modified: peps/trunk/pep-0000.txt peps/trunk/pep-0349.txt Log: Defer PEP 349. Modified: peps/trunk/pep-0000.txt ============================================================================== --- peps/trunk/pep-0000.txt (original) +++ peps/trunk/pep-0000.txt Sat Oct 29 16:36:02 2005 @@ -104,7 +104,6 @@ S 344 Exception Chaining and Embedded Tracebacks Yee S 345 Metadata for Python Software Packages 1.2 Jones P 347 Migrating the Python CVS to Subversion von Lwis - S 349 Allow str() to return unicode strings Schemenauer I 350 Codetags Elliott S 351 The freeze protocol Warsaw S 352 Required Superclass for Exceptions GvR, Cannon @@ -220,6 +219,7 @@ SR 340 Anonymous Block Statements GvR SR 346 User Defined ("with") Statements Coghlan SR 348 Exception Reorganization for Python 3.0 Cannon + SD 349 Allow str() to return unicode strings Schemenauer SR 666 Reject Foolish Indentation Creighton @@ -396,7 +396,7 @@ SR 346 User Defined ("with") Statements Coghlan P 347 Migrating the Python CVS to Subversion von Lwis SR 348 Exception Reorganization for Python 3.0 Cannon - S 349 Allow str() to return unicode strings Schemenauer + SD 349 Allow str() to return unicode strings Schemenauer I 350 Codetags Elliott S 351 The freeze protocol Warsaw S 352 Required Superclass for Exceptions GvR, Cannon Modified: peps/trunk/pep-0349.txt ============================================================================== --- peps/trunk/pep-0349.txt (original) +++ peps/trunk/pep-0349.txt Sat Oct 29 16:36:02 2005 @@ -3,7 +3,7 @@ Version: $Revision$ Last-Modified: $Date$ Author: Neil Schemenauer -Status: Draft +Status: Deferred Type: Standards Track Content-Type: text/plain Created: 02-Aug-2005 From martin.v.loewis at python.org Sat Oct 29 21:40:22 2005 From: martin.v.loewis at python.org (martin.v.loewis@python.org) Date: Sat, 29 Oct 2005 21:40:22 +0200 (CEST) Subject: [Python-checkins] commit of r41352 - in python/trunk: . Lib Lib/distutils Lib/distutils/command Lib/encodings Message-ID: <20051029194022.470D61E40B4@bag.python.org> Author: martin.v.loewis Date: Sat Oct 29 21:40:21 2005 New Revision: 41352 Modified: python/trunk/ (props changed) python/trunk/.cvsignore python/trunk/Lib/ (props changed) python/trunk/Lib/distutils/ (props changed) python/trunk/Lib/distutils/command/ (props changed) python/trunk/Lib/encodings/ (props changed) Log: Add *.pyc to svn:ignore. Add libpython*.a to .cvsignore and svn:ignore. Modified: python/trunk/.cvsignore ============================================================================== --- python/trunk/.cvsignore (original) +++ python/trunk/.cvsignore Sat Oct 29 21:40:21 2005 @@ -9,3 +9,4 @@ Makefile.pre platform pyconfig.h +libpython*.a From fdrake at acm.org Sat Oct 29 21:50:11 2005 From: fdrake at acm.org (Fred L. Drake, Jr.) Date: Sat, 29 Oct 2005 15:50:11 -0400 Subject: [Python-checkins] commit of r41352 - in python/trunk: . Lib Lib/distutils Lib/distutils/command Lib/encodings In-Reply-To: <20051029194022.470D61E40B4@bag.python.org> References: <20051029194022.470D61E40B4@bag.python.org> Message-ID: <200510291550.12279.fdrake@acm.org> On Saturday 29 October 2005 15:40, martin.v.loewis at python.org wrote: > Author: martin.v.loewis > Date: Sat Oct 29 21:40:21 2005 > New Revision: 41352 > > Modified: > python/trunk/ (props changed) > python/trunk/.cvsignore ... > Add *.pyc to svn:ignore. > Add libpython*.a to .cvsignore and svn:ignore. Shouldn't we simply remove the .cvsignore files? Subversion doesn't use them, so they'll just end up getting out of sync with the svn:ignore properties. -Fred -- Fred L. Drake, Jr. From martin at v.loewis.de Sun Oct 30 00:53:53 2005 From: martin at v.loewis.de (=?ISO-8859-1?Q?=22Martin_v=2E_L=F6wis=22?=) Date: Sun, 30 Oct 2005 00:53:53 +0200 Subject: [Python-checkins] [Python-Dev] commit of r41352 - in python/trunk: . Lib Lib/distutils Lib/distutils/command Lib/encodings In-Reply-To: <200510291550.12279.fdrake@acm.org> References: <20051029194022.470D61E40B4@bag.python.org> <200510291550.12279.fdrake@acm.org> Message-ID: <4363FD81.10403@v.loewis.de> Fred L. Drake, Jr. wrote: > Shouldn't we simply remove the .cvsignore files? Subversion doesn't use them, > so they'll just end up getting out of sync with the svn:ignore properties. That might be reasonable. I just noticed that it is convenient to do svn propset svn:ignore -F .cvsignore . Without a file, I wouldn't know how to edit the property, so I would probably do svn propget svn:ignore . > ignores vim ignores svn propset svn:ignore -F ignores . rm ignores Regards, Martin From noamraph at gmail.com Sun Oct 30 01:32:41 2005 From: noamraph at gmail.com (Noam Raphael) Date: Sun, 30 Oct 2005 01:32:41 +0200 Subject: [Python-checkins] [Python-Dev] commit of r41352 - in python/trunk: . Lib Lib/distutils Lib/distutils/command Lib/encodings In-Reply-To: <4363FD81.10403@v.loewis.de> References: <20051029194022.470D61E40B4@bag.python.org> <200510291550.12279.fdrake@acm.org> <4363FD81.10403@v.loewis.de> Message-ID: > That might be reasonable. I just noticed that it is convenient to do > > svn propset svn:ignore -F .cvsignore . > > Without a file, I wouldn't know how to edit the property, so I would > probably do > > svn propget svn:ignore . > ignores > vim ignores > svn propset svn:ignore -F ignores . > rm ignores > Won't "svn propedit svn:ignore ." do the trick? Noam From pinard at iro.umontreal.ca Sun Oct 30 02:16:11 2005 From: pinard at iro.umontreal.ca (=?iso-8859-1?Q?Fran=E7ois?= Pinard) Date: Sat, 29 Oct 2005 20:16:11 -0400 Subject: [Python-checkins] [Python-Dev] commit of r41352 - in python/trunk: . Lib Lib/distutils Lib/distutils/command Lib/encodings In-Reply-To: <4363FD81.10403@v.loewis.de> References: <20051029194022.470D61E40B4@bag.python.org> <200510291550.12279.fdrake@acm.org> <4363FD81.10403@v.loewis.de> Message-ID: <20051030001611.GA22474@phenix.sram.qc.ca> [Martin von L?wis] >Without a file, I wouldn't know how to edit the property, so I would >probably do >svn propget svn:ignore . > ignores >vim ignores >svn propset svn:ignore -F ignores . >rm ignores You can use `svn propedit' (or `svn pe'). -- Fran?ois Pinard http://pinard.progiciels-bpi.ca From tim.peters at python.org Sun Oct 30 02:15:39 2005 From: tim.peters at python.org (tim.peters@python.org) Date: Sun, 30 Oct 2005 02:15:39 +0100 (CET) Subject: [Python-checkins] commit of r41353 - python/trunk/Lib/test Message-ID: <20051030011539.5CFBD1E4009@bag.python.org> Author: tim.peters Date: Sun Oct 30 02:15:38 2005 New Revision: 41353 Modified: python/trunk/Lib/test/test_cmd_line.py Log: test_directories(): This test had no chance of passing on Windows. Hacked it to pass, but not sure it's worth the bother. Modified: python/trunk/Lib/test/test_cmd_line.py ============================================================================== --- python/trunk/Lib/test/test_cmd_line.py (original) +++ python/trunk/Lib/test/test_cmd_line.py Sun Oct 30 02:15:38 2005 @@ -12,8 +12,20 @@ return data def test_directories(self): - self.assertTrue('is a directory' in self.start_python('.')) - self.assertTrue('is a directory' in self.start_python('< .')) + # Does this test make sense? The message for "< ." may depend on + # the command shell, and the message for "." depends on the OS. + if sys.platform.startswith("win"): + # On WinXP w/ cmd.exe, + # "< ." gives "Access is denied.\n" + # "." gives "C:\\Code\\python\\PCbuild\\python.exe: " + + # "can't open file '.':" + + # "[Errno 13] Permission denied\n" + lookfor = " denied" # common to both cases + else: + # This is what the test looked for originally, on all platforms. + lookfor = "is a directory" + self.assertTrue(lookfor in self.start_python('.')) + self.assertTrue(lookfor in self.start_python('< .')) def verify_valid_flag(self, cmd_line): data = self.start_python(cmd_line) From hyeshik.chang at python.org Sun Oct 30 04:05:28 2005 From: hyeshik.chang at python.org (hyeshik.chang@python.org) Date: Sun, 30 Oct 2005 04:05:28 +0100 (CET) Subject: [Python-checkins] commit of r41354 - in python: branches/release24-maint/Doc/lib trunk/Doc/lib Message-ID: <20051030030528.248421E4034@bag.python.org> Author: hyeshik.chang Date: Sun Oct 30 04:05:27 2005 New Revision: 41354 Modified: python/branches/release24-maint/Doc/lib/xmldomminidom.tex python/trunk/Doc/lib/xmldomminidom.tex Log: SF Bug #1341934: Fix a representation of "\n" to use a proper tag. Modified: python/branches/release24-maint/Doc/lib/xmldomminidom.tex ============================================================================== --- python/branches/release24-maint/Doc/lib/xmldomminidom.tex (original) +++ python/branches/release24-maint/Doc/lib/xmldomminidom.tex Sun Oct 30 04:05:27 2005 @@ -175,7 +175,7 @@ \begin{methoddesc}[Node]{toprettyxml}{\optional{indent\optional{, newl}}} Return a pretty-printed version of the document. \var{indent} specifies the indentation string and defaults to a tabulator; \var{newl} specifies -the string emitted at the end of each line and defaults to \\n. +the string emitted at the end of each line and defaults to \code{\e n}. \versionadded{2.1} \versionchanged[the encoding argument; see \method{toxml()}]{2.3} Modified: python/trunk/Doc/lib/xmldomminidom.tex ============================================================================== --- python/trunk/Doc/lib/xmldomminidom.tex (original) +++ python/trunk/Doc/lib/xmldomminidom.tex Sun Oct 30 04:05:27 2005 @@ -175,7 +175,7 @@ \begin{methoddesc}[Node]{toprettyxml}{\optional{indent\optional{, newl}}} Return a pretty-printed version of the document. \var{indent} specifies the indentation string and defaults to a tabulator; \var{newl} specifies -the string emitted at the end of each line and defaults to \\n. +the string emitted at the end of each line and defaults to \code{\e n}. \versionadded{2.1} \versionchanged[the encoding argument; see \method{toxml()}]{2.3} From fred.drake at python.org Sun Oct 30 05:29:49 2005 From: fred.drake at python.org (fred.drake@python.org) Date: Sun, 30 Oct 2005 05:29:49 +0100 (CET) Subject: [Python-checkins] commit of r41355 - python/trunk/Doc/ref Message-ID: <20051030042949.D72AA1E4009@bag.python.org> Author: fred.drake Date: Sun Oct 30 05:29:49 2005 New Revision: 41355 Modified: python/trunk/Doc/ref/ref3.tex Log: add missing "and" Modified: python/trunk/Doc/ref/ref3.tex ============================================================================== --- python/trunk/Doc/ref/ref3.tex (original) +++ python/trunk/Doc/ref/ref3.tex Sun Oct 30 05:29:49 2005 @@ -410,8 +410,9 @@ Displays''). The extension modules \module{dbm}\refstmodindex{dbm}, -\module{gdbm}\refstmodindex{gdbm}, \module{bsddb}\refstmodindex{bsddb} -provide additional examples of mapping types. +\module{gdbm}\refstmodindex{gdbm}, and +\module{bsddb}\refstmodindex{bsddb} provide additional examples of +mapping types. \end{description} % Mapping types From fred.drake at python.org Sun Oct 30 05:44:34 2005 From: fred.drake at python.org (fred.drake@python.org) Date: Sun, 30 Oct 2005 05:44:34 +0100 (CET) Subject: [Python-checkins] commit of r41356 - python/branches/release24-maint/Doc/ref Message-ID: <20051030044434.B96D31E4009@bag.python.org> Author: fred.drake Date: Sun Oct 30 05:44:34 2005 New Revision: 41356 Modified: python/branches/release24-maint/Doc/ref/ref3.tex Log: add missing "and" Modified: python/branches/release24-maint/Doc/ref/ref3.tex ============================================================================== --- python/branches/release24-maint/Doc/ref/ref3.tex (original) +++ python/branches/release24-maint/Doc/ref/ref3.tex Sun Oct 30 05:44:34 2005 @@ -410,8 +410,9 @@ Displays''). The extension modules \module{dbm}\refstmodindex{dbm}, -\module{gdbm}\refstmodindex{gdbm}, \module{bsddb}\refstmodindex{bsddb} -provide additional examples of mapping types. +\module{gdbm}\refstmodindex{gdbm}, and +\module{bsddb}\refstmodindex{bsddb} provide additional examples of +mapping types. \end{description} % Mapping types From martin at v.loewis.de Sun Oct 30 12:06:15 2005 From: martin at v.loewis.de (=?ISO-8859-1?Q?=22Martin_v=2E_L=F6wis=22?=) Date: Sun, 30 Oct 2005 12:06:15 +0100 Subject: [Python-checkins] [Python-Dev] commit of r41352 - in python/trunk: . Lib Lib/distutils Lib/distutils/command Lib/encodings In-Reply-To: References: <20051029194022.470D61E40B4@bag.python.org> <200510291550.12279.fdrake@acm.org> <4363FD81.10403@v.loewis.de> Message-ID: <4364A927.5040209@v.loewis.de> Noam Raphael wrote: > Won't "svn propedit svn:ignore ." do the trick? It certainly would. Thanks for pointing that out. Regards, Martin From martin.v.loewis at python.org Sun Oct 30 23:01:43 2005 From: martin.v.loewis at python.org (martin.v.loewis@python.org) Date: Sun, 30 Oct 2005 23:01:43 +0100 (CET) Subject: [Python-checkins] commit of r41357 - in python/trunk: . Demo/embed Doc Doc/api Doc/commontex Doc/dist Doc/ext Doc/html Doc/info Doc/isilo Doc/lib Doc/mac Doc/paper-a4 Doc/paper-letter Doc/ref Doc/tut Grammar Mac/OSX Mac/OSX/PythonLauncher Mac/OSX/PythonLauncher/PythonLauncher.pbproj Modules Objects PC PC/VC6 PC/bdist_wininst PC/example_nt PCbuild Parser Python Tools/freeze Message-ID: <20051030220143.34C731E4009@bag.python.org> Author: martin.v.loewis Date: Sun Oct 30 23:01:41 2005 New Revision: 41357 Removed: python/trunk/.cvsignore python/trunk/Demo/embed/.cvsignore python/trunk/Doc/.cvsignore python/trunk/Doc/api/.cvsignore python/trunk/Doc/commontex/.cvsignore python/trunk/Doc/dist/.cvsignore python/trunk/Doc/ext/.cvsignore python/trunk/Doc/html/.cvsignore python/trunk/Doc/info/.cvsignore python/trunk/Doc/isilo/.cvsignore python/trunk/Doc/lib/.cvsignore python/trunk/Doc/mac/.cvsignore python/trunk/Doc/paper-a4/.cvsignore python/trunk/Doc/paper-letter/.cvsignore python/trunk/Doc/ref/.cvsignore python/trunk/Doc/tut/.cvsignore python/trunk/Grammar/.cvsignore python/trunk/Mac/OSX/.cvsignore python/trunk/Mac/OSX/PythonLauncher/.cvsignore python/trunk/Mac/OSX/PythonLauncher/PythonLauncher.pbproj/.cvsignore python/trunk/Modules/.cvsignore python/trunk/Objects/.cvsignore python/trunk/PC/.cvsignore python/trunk/PC/VC6/.cvsignore python/trunk/PC/bdist_wininst/.cvsignore python/trunk/PC/example_nt/.cvsignore python/trunk/PCbuild/.cvsignore python/trunk/Parser/.cvsignore python/trunk/Python/.cvsignore python/trunk/Tools/freeze/.cvsignore Log: Remove .cvsignore files, as they live in svn:ignore properties now. Deleted: /python/trunk/.cvsignore ============================================================================== --- /python/trunk/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,12 +0,0 @@ -.purify -config.log -config.cache -config.status -Makefile -buildno -python -build -Makefile.pre -platform -pyconfig.h -libpython*.a Deleted: /python/trunk/Demo/embed/.cvsignore ============================================================================== --- /python/trunk/Demo/embed/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,3 +0,0 @@ -demo -loop -importexc Deleted: /python/trunk/Doc/.cvsignore ============================================================================== --- /python/trunk/Doc/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,6 +0,0 @@ -*.tgz -*.tar.bz2 -*.zip -*.tar -pkglist.html -.doctype Deleted: /python/trunk/Doc/api/.cvsignore ============================================================================== --- /python/trunk/Doc/api/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,3 +0,0 @@ -*.esis -*.esis1 -*.xml Deleted: /python/trunk/Doc/commontex/.cvsignore ============================================================================== --- /python/trunk/Doc/commontex/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1 +0,0 @@ -patchlevel.tex Deleted: /python/trunk/Doc/dist/.cvsignore ============================================================================== --- /python/trunk/Doc/dist/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,3 +0,0 @@ -*.esis -*.esis1 -*.xml Deleted: /python/trunk/Doc/ext/.cvsignore ============================================================================== --- /python/trunk/Doc/ext/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,3 +0,0 @@ -*.esis -*.esis1 -*.xml Deleted: /python/trunk/Doc/html/.cvsignore ============================================================================== --- /python/trunk/Doc/html/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,14 +0,0 @@ -api -doc -ext -lib -mac -ref -tut -dist -inst -whatsnew -acks.html -index.html -modindex.html - at webchecker.pickle Deleted: /python/trunk/Doc/info/.cvsignore ============================================================================== --- /python/trunk/Doc/info/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,2 +0,0 @@ -*.info* -*.texi Deleted: /python/trunk/Doc/isilo/.cvsignore ============================================================================== --- /python/trunk/Doc/isilo/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,11 +0,0 @@ -api -doc -ext -lib -mac -ref -tut -dist -inst -whatsnew -python-*.pdb Deleted: /python/trunk/Doc/lib/.cvsignore ============================================================================== --- /python/trunk/Doc/lib/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,3 +0,0 @@ -*.esis -*.esis1 -*.xml Deleted: /python/trunk/Doc/mac/.cvsignore ============================================================================== --- /python/trunk/Doc/mac/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,3 +0,0 @@ -*.esis -*.esis1 -*.xml Deleted: /python/trunk/Doc/paper-a4/.cvsignore ============================================================================== --- /python/trunk/Doc/paper-a4/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,16 +0,0 @@ -*.ind -*.log -*.aux -*.dvi -*.toc -*.ps -*.idx -*.ilg -*.pdf -*.bkm -*.syn -*.pla -*.l2h -*.how -README -*.tex Deleted: /python/trunk/Doc/paper-letter/.cvsignore ============================================================================== --- /python/trunk/Doc/paper-letter/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,16 +0,0 @@ -*.ind -*.log -*.aux -*.dvi -*.toc -*.ps -*.idx -*.ilg -*.pdf -*.bkm -*.syn -*.pla -*.l2h -*.how -README -*.tex Deleted: /python/trunk/Doc/ref/.cvsignore ============================================================================== --- /python/trunk/Doc/ref/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,3 +0,0 @@ -*.esis -*.esis1 -*.xml Deleted: /python/trunk/Doc/tut/.cvsignore ============================================================================== --- /python/trunk/Doc/tut/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,3 +0,0 @@ -*.esis -*.esis1 -*.xml Deleted: /python/trunk/Grammar/.cvsignore ============================================================================== --- /python/trunk/Grammar/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,3 +0,0 @@ -graminit.h -graminit.c -Makefile Deleted: /python/trunk/Mac/OSX/.cvsignore ============================================================================== --- /python/trunk/Mac/OSX/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,4 +0,0 @@ -build -build-html -Mac.jaguar.pth -pythonw.sh Deleted: /python/trunk/Mac/OSX/PythonLauncher/.cvsignore ============================================================================== --- /python/trunk/Mac/OSX/PythonLauncher/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1 +0,0 @@ -build Deleted: /python/trunk/Mac/OSX/PythonLauncher/PythonLauncher.pbproj/.cvsignore ============================================================================== --- /python/trunk/Mac/OSX/PythonLauncher/PythonLauncher.pbproj/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1 +0,0 @@ -*.pbxuser Deleted: /python/trunk/Modules/.cvsignore ============================================================================== --- /python/trunk/Modules/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,9 +0,0 @@ -Setup -Makefile.pre -Setup.thread -Setup.config -Setup.local -hassignal -config.c -Makefile -add2lib Deleted: /python/trunk/Objects/.cvsignore ============================================================================== --- /python/trunk/Objects/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,2 +0,0 @@ -add2lib -Makefile Deleted: /python/trunk/PC/.cvsignore ============================================================================== --- /python/trunk/PC/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,4 +0,0 @@ -python_nt.h -python_nt_d.h -pythonnt_rc.h -pythonnt_rc_d.h Deleted: /python/trunk/PC/VC6/.cvsignore ============================================================================== --- /python/trunk/PC/VC6/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,14 +0,0 @@ -*.WSM -*.bsc -*.dbg -*.dll -*.exe -*.exp -*.ilk -*.lib -*.ncb -*.opt -*.pdb -*.plg -*.pyd -*-temp-* Deleted: /python/trunk/PC/bdist_wininst/.cvsignore ============================================================================== --- /python/trunk/PC/bdist_wininst/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,8 +0,0 @@ -temp-debug -temp-release -wininst-7.1.ncb -wininst-7.1.suo -wininst.ncb -wininst.opt -wininst.pdb -wininst.plg Deleted: /python/trunk/PC/example_nt/.cvsignore ============================================================================== --- /python/trunk/PC/example_nt/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,5 +0,0 @@ -? example.ncb -? release -? debug -? .c -? example.mdp Deleted: /python/trunk/PCbuild/.cvsignore ============================================================================== --- /python/trunk/PCbuild/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,29 +0,0 @@ -*.WSM -*.bsc -*.dbg -*.dll -*.exe -*.exp -*.ilk -*.lib -*.ncb -*.opt -*.pdb -*.plg -*.pyd -*-temp-* -*.suo -NormalizationTest-3.2.0.txt -JOHAB.TXT -EUC-KR.TXT -EUC-JP.TXT -EUC-JISX0213.TXT -EUC-CN.TXT -CP950.TXT -CP949.TXT -CP936.TXT -CP932.TXT -BIG5HKSCS.TXT -BIG5.TXT -SHIFT_JISX0213.TXT -SHIFTJIS.TXT Deleted: /python/trunk/Parser/.cvsignore ============================================================================== --- /python/trunk/Parser/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,6 +0,0 @@ -Makefile -pgen -add2lib -asdl.pyc -asdl_c.pyc -spark.pyc Deleted: /python/trunk/Python/.cvsignore ============================================================================== --- /python/trunk/Python/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,2 +0,0 @@ -Makefile -add2lib Deleted: /python/trunk/Tools/freeze/.cvsignore ============================================================================== --- /python/trunk/Tools/freeze/.cvsignore Sun Oct 30 23:01:41 2005 +++ (empty file) @@ -1,6 +0,0 @@ -M_*.c -*.o -Makefile -config.c -frozen.c -hello From vinay.sajip at python.org Mon Oct 31 14:14:20 2005 From: vinay.sajip at python.org (vinay.sajip@python.org) Date: Mon, 31 Oct 2005 14:14:20 +0100 (CET) Subject: [Python-checkins] commit of r41358 - python/trunk/Lib/logging Message-ID: <20051031131420.9AFFC1E4009@bag.python.org> Author: vinay.sajip Date: Mon Oct 31 14:14:19 2005 New Revision: 41358 Modified: python/trunk/Lib/logging/__init__.py python/trunk/Lib/logging/handlers.py Log: Exception handling now raises KeyboardInterrupt and SystemExit rather than passing to handleError Modified: python/trunk/Lib/logging/__init__.py ============================================================================== --- python/trunk/Lib/logging/__init__.py (original) +++ python/trunk/Lib/logging/__init__.py Mon Oct 31 14:14:19 2005 @@ -738,6 +738,8 @@ except UnicodeError: self.stream.write(fs % msg.encode("UTF-8")) self.flush() + except (KeyboardInterrupt, SystemExit): + raise except: self.handleError(record) Modified: python/trunk/Lib/logging/handlers.py ============================================================================== --- python/trunk/Lib/logging/handlers.py (original) +++ python/trunk/Lib/logging/handlers.py Mon Oct 31 14:14:19 2005 @@ -71,6 +71,8 @@ if self.shouldRollover(record): self.doRollover() logging.FileHandler.emit(self, record) + except (KeyboardInterrupt, SystemExit): + raise except: self.handleError(record) @@ -418,6 +420,8 @@ try: s = self.makePickle(record) self.send(s) + except (KeyboardInterrupt, SystemExit): + raise except: self.handleError(record) @@ -639,6 +643,8 @@ self.socket.send(msg) else: self.socket.sendto(msg, self.address) + except (KeyboardInterrupt, SystemExit): + raise except: self.handleError(record) From vinay.sajip at python.org Mon Oct 31 15:20:57 2005 From: vinay.sajip at python.org (vinay.sajip@python.org) Date: Mon, 31 Oct 2005 15:20:57 +0100 (CET) Subject: [Python-checkins] commit of r41359 - python/branches/release24-maint/Lib/logging Message-ID: <20051031142057.24AD81E4030@bag.python.org> Author: vinay.sajip Date: Mon Oct 31 15:20:56 2005 New Revision: 41359 Modified: python/branches/release24-maint/Lib/logging/__init__.py Log: Exception handling now raises KeyboardInterrupt and SystemExit rather than passing to handleError Modified: python/branches/release24-maint/Lib/logging/__init__.py ============================================================================== --- python/branches/release24-maint/Lib/logging/__init__.py (original) +++ python/branches/release24-maint/Lib/logging/__init__.py Mon Oct 31 15:20:56 2005 @@ -738,6 +738,8 @@ except UnicodeError: self.stream.write(fs % msg.encode("UTF-8")) self.flush() + except (KeyboardInterrupt, SystemExit): + raise except: self.handleError(record) From vinay.sajip at python.org Mon Oct 31 15:27:02 2005 From: vinay.sajip at python.org (vinay.sajip@python.org) Date: Mon, 31 Oct 2005 15:27:02 +0100 (CET) Subject: [Python-checkins] commit of r41360 - python/trunk/Lib/logging Message-ID: <20051031142702.460F21E49BB@bag.python.org> Author: vinay.sajip Date: Mon Oct 31 15:27:01 2005 New Revision: 41360 Modified: python/trunk/Lib/logging/handlers.py Log: Exception handling now raises KeyboardInterrupt and SystemExit rather than passing to handleError Modified: python/trunk/Lib/logging/handlers.py ============================================================================== --- python/trunk/Lib/logging/handlers.py (original) +++ python/trunk/Lib/logging/handlers.py Mon Oct 31 15:27:01 2005 @@ -725,6 +725,8 @@ formatdate(), msg) smtp.sendmail(self.fromaddr, self.toaddrs, msg) smtp.quit() + except (KeyboardInterrupt, SystemExit): + raise except: self.handleError(record) @@ -810,6 +812,8 @@ type = self.getEventType(record) msg = self.format(record) self._welu.ReportEvent(self.appname, id, cat, type, [msg]) + except (KeyboardInterrupt, SystemExit): + raise except: self.handleError(record) @@ -885,6 +889,8 @@ if self.method == "POST": h.send(data) h.getreply() #can't do anything with the result + except (KeyboardInterrupt, SystemExit): + raise except: self.handleError(record) From vinay.sajip at python.org Mon Oct 31 15:30:37 2005 From: vinay.sajip at python.org (vinay.sajip@python.org) Date: Mon, 31 Oct 2005 15:30:37 +0100 (CET) Subject: [Python-checkins] commit of r41361 - python/branches/release24-maint/Lib/logging Message-ID: <20051031143037.B4A1F1E4247@bag.python.org> Author: vinay.sajip Date: Mon Oct 31 15:30:37 2005 New Revision: 41361 Modified: python/branches/release24-maint/Lib/logging/handlers.py Log: Exception handling now raises KeyboardInterrupt and SystemExit rather than passing to handleError Modified: python/branches/release24-maint/Lib/logging/handlers.py ============================================================================== --- python/branches/release24-maint/Lib/logging/handlers.py (original) +++ python/branches/release24-maint/Lib/logging/handlers.py Mon Oct 31 15:30:37 2005 @@ -71,6 +71,8 @@ if self.shouldRollover(record): self.doRollover() logging.FileHandler.emit(self, record) + except (KeyboardInterrupt, SystemExit): + raise except: self.handleError(record) @@ -418,6 +420,8 @@ try: s = self.makePickle(record) self.send(s) + except (KeyboardInterrupt, SystemExit): + raise except: self.handleError(record) @@ -639,6 +643,8 @@ self.socket.send(msg) else: self.socket.sendto(msg, self.address) + except (KeyboardInterrupt, SystemExit): + raise except: self.handleError(record) @@ -719,6 +725,8 @@ formatdate(), msg) smtp.sendmail(self.fromaddr, self.toaddrs, msg) smtp.quit() + except (KeyboardInterrupt, SystemExit): + raise except: self.handleError(record) @@ -804,6 +812,8 @@ type = self.getEventType(record) msg = self.format(record) self._welu.ReportEvent(self.appname, id, cat, type, [msg]) + except (KeyboardInterrupt, SystemExit): + raise except: self.handleError(record) @@ -879,6 +889,8 @@ if self.method == "POST": h.send(data) h.getreply() #can't do anything with the result + except (KeyboardInterrupt, SystemExit): + raise except: self.handleError(record) From guido.van.rossum at python.org Mon Oct 31 20:22:43 2005 From: guido.van.rossum at python.org (guido.van.rossum@python.org) Date: Mon, 31 Oct 2005 20:22:43 +0100 (CET) Subject: [Python-checkins] commit of r41362 - peps/trunk Message-ID: <20051031192243.CA2101E4009@bag.python.org> Author: guido.van.rossum Date: Mon Oct 31 20:22:43 2005 New Revision: 41362 Modified: peps/trunk/pep-0352.txt Log: Final tweaks: - fix the __str__, __unicode__ and __repr__ methods - some textual tweaks - add Python 3.0 to the transition plan Modified: peps/trunk/pep-0352.txt ============================================================================== --- peps/trunk/pep-0352.txt (original) +++ peps/trunk/pep-0352.txt Mon Oct 31 20:22:43 2005 @@ -65,47 +65,46 @@ def __init__(self, *args): """Set 'message' and 'args' attribute""" - self.args = args - self.message = args[0] if args else '' + self.args = args + self.message = args[0] if args else '' def __str__(self): - """Return the str of 'message'""" - return str(self.message - if not self.args - else self.args) + """Return the str of 'message'""" + return str(self.message + if len(self.args) <= 1 + else self.args) def __unicode__(self): - """Return the unicode of 'message'""" - return unicode(self.message - if not self.args - else self.args) + """Return the unicode of 'message'""" + return unicode(self.message + if len(self.args) <= 1 + else self.args) def __repr__(self): - args_repr = (repr(self.message) - if not self.args - else "*%r" % self.args) - return "%s(%s)" % (self.__class__.__name__, args_repr) + if (len(self.args) <= 1): + return "%s(%r)" % (self.__class__.__name__, self.message) + return "%s%r" % (self.__class__.__name__, self.args) def __getitem__(self, index): """Index into arguments passed in during instantiation. - Provided for backwards-compatibility and will be - deprecated. + Provided for backwards-compatibility and will be + deprecated. - """ - return self.args[index] + """ + return self.args[index] -The ``message`` attribute will contain either the argument passed in -at instantiation of the object or the empty string. The attribute is -meant to act as a common location to store any extra information that -is to be passed along with the exception that goes beyond the location -of the exception within the exception hierarchy and the exception's -type. +The ``message`` attribute will contain either the first argument +passed in at instantiation of the object or the empty string if no +arguments were passed in. The attribute is meant to act as a common +location to store any extra information that is to be passed along +with the exception that goes beyond the location of the exception +within the exception hierarchy and the exception's type. No restriction is placed upon what may be passed in for ``messsage``. -This provides backwards-compatibility with how the argument passed -into Exception has no restrictions. +This provides backwards-compatibility with how the arguments passed +into Exception have no restrictions. The ``args`` attribute is deprecated. While allowing multiple arguments to be passed can be helpful, it is in no way essential. It @@ -214,11 +213,14 @@ - deprecate catching exceptions that do not inherit from BaseException - * Python 2.9 - deprecate ``args`` and ``__getitem__`` +* Python 3.0 + + - drop ``args`` and ``__getitem__`` + References ==========